jscript: Store concatenated strings as a rope string to avoid useless copying.
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2011 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d_perf);
35 WINE_DECLARE_DEBUG_CHANNEL(d3d);
36
37 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
38         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
39         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter);
40 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
41         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
42         enum wined3d_texture_filter_type filter);
43
44 static void surface_cleanup(struct wined3d_surface *surface)
45 {
46     struct wined3d_surface *overlay, *cur;
47
48     TRACE("surface %p.\n", surface);
49
50     if (surface->texture_name || (surface->flags & SFLAG_PBO)
51              || surface->rb_multisample || surface->rb_resolved
52              || !list_empty(&surface->renderbuffers))
53     {
54         struct wined3d_renderbuffer_entry *entry, *entry2;
55         const struct wined3d_gl_info *gl_info;
56         struct wined3d_context *context;
57
58         context = context_acquire(surface->resource.device, NULL);
59         gl_info = context->gl_info;
60
61         if (surface->texture_name)
62         {
63             TRACE("Deleting texture %u.\n", surface->texture_name);
64             gl_info->gl_ops.gl.p_glDeleteTextures(1, &surface->texture_name);
65         }
66
67         if (surface->flags & SFLAG_PBO)
68         {
69             TRACE("Deleting PBO %u.\n", surface->pbo);
70             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
71         }
72
73         if (surface->rb_multisample)
74         {
75             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
76             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
77         }
78
79         if (surface->rb_resolved)
80         {
81             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
82             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
83         }
84
85         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
86         {
87             TRACE("Deleting renderbuffer %u.\n", entry->id);
88             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
89             HeapFree(GetProcessHeap(), 0, entry);
90         }
91
92         context_release(context);
93     }
94
95     if (surface->flags & SFLAG_DIBSECTION)
96     {
97         DeleteDC(surface->hDC);
98         DeleteObject(surface->dib.DIBsection);
99         surface->dib.bitmap_data = NULL;
100         surface->resource.allocatedMemory = NULL;
101     }
102
103     if (surface->flags & SFLAG_USERPTR)
104         wined3d_surface_set_mem(surface, NULL);
105     if (surface->overlay_dest)
106         list_remove(&surface->overlay_entry);
107
108     LIST_FOR_EACH_ENTRY_SAFE(overlay, cur, &surface->overlays, struct wined3d_surface, overlay_entry)
109     {
110         list_remove(&overlay->overlay_entry);
111         overlay->overlay_dest = NULL;
112     }
113
114     resource_cleanup(&surface->resource);
115 }
116
117 void surface_update_draw_binding(struct wined3d_surface *surface)
118 {
119     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
120         surface->draw_binding = SFLAG_INDRAWABLE;
121     else if (surface->resource.multisample_type)
122         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
123     else
124         surface->draw_binding = SFLAG_INTEXTURE;
125 }
126
127 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
128 {
129     TRACE("surface %p, container %p.\n", surface, container);
130
131     if (!container && type != WINED3D_CONTAINER_NONE)
132         ERR("Setting NULL container of type %#x.\n", type);
133
134     if (type == WINED3D_CONTAINER_SWAPCHAIN)
135     {
136         surface->get_drawable_size = get_drawable_size_swapchain;
137     }
138     else
139     {
140         switch (wined3d_settings.offscreen_rendering_mode)
141         {
142             case ORM_FBO:
143                 surface->get_drawable_size = get_drawable_size_fbo;
144                 break;
145
146             case ORM_BACKBUFFER:
147                 surface->get_drawable_size = get_drawable_size_backbuffer;
148                 break;
149
150             default:
151                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
152                 return;
153         }
154     }
155
156     surface->container.type = type;
157     surface->container.u.base = container;
158     surface_update_draw_binding(surface);
159 }
160
161 struct blt_info
162 {
163     GLenum binding;
164     GLenum bind_target;
165     enum tex_types tex_type;
166     GLfloat coords[4][3];
167 };
168
169 struct float_rect
170 {
171     float l;
172     float t;
173     float r;
174     float b;
175 };
176
177 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
178 {
179     f->l = ((r->left * 2.0f) / w) - 1.0f;
180     f->t = ((r->top * 2.0f) / h) - 1.0f;
181     f->r = ((r->right * 2.0f) / w) - 1.0f;
182     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
183 }
184
185 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
186 {
187     GLfloat (*coords)[3] = info->coords;
188     struct float_rect f;
189
190     switch (target)
191     {
192         default:
193             FIXME("Unsupported texture target %#x\n", target);
194             /* Fall back to GL_TEXTURE_2D */
195         case GL_TEXTURE_2D:
196             info->binding = GL_TEXTURE_BINDING_2D;
197             info->bind_target = GL_TEXTURE_2D;
198             info->tex_type = tex_2d;
199             coords[0][0] = (float)rect->left / w;
200             coords[0][1] = (float)rect->top / h;
201             coords[0][2] = 0.0f;
202
203             coords[1][0] = (float)rect->right / w;
204             coords[1][1] = (float)rect->top / h;
205             coords[1][2] = 0.0f;
206
207             coords[2][0] = (float)rect->left / w;
208             coords[2][1] = (float)rect->bottom / h;
209             coords[2][2] = 0.0f;
210
211             coords[3][0] = (float)rect->right / w;
212             coords[3][1] = (float)rect->bottom / h;
213             coords[3][2] = 0.0f;
214             break;
215
216         case GL_TEXTURE_RECTANGLE_ARB:
217             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
218             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
219             info->tex_type = tex_rect;
220             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
221             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
222             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
223             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
224             break;
225
226         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
227             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
228             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
229             info->tex_type = tex_cube;
230             cube_coords_float(rect, w, h, &f);
231
232             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
233             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
234             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
235             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
236             break;
237
238         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
239             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
240             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
241             info->tex_type = tex_cube;
242             cube_coords_float(rect, w, h, &f);
243
244             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
245             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
246             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
247             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
248             break;
249
250         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
251             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
252             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
253             info->tex_type = tex_cube;
254             cube_coords_float(rect, w, h, &f);
255
256             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
257             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
258             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
259             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
260             break;
261
262         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
263             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
264             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
265             info->tex_type = tex_cube;
266             cube_coords_float(rect, w, h, &f);
267
268             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
269             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
270             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
271             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
272             break;
273
274         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
275             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
276             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
277             info->tex_type = tex_cube;
278             cube_coords_float(rect, w, h, &f);
279
280             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
281             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
282             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
283             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
284             break;
285
286         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
287             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
288             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
289             info->tex_type = tex_cube;
290             cube_coords_float(rect, w, h, &f);
291
292             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
293             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
294             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
295             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
296             break;
297     }
298 }
299
300 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
301 {
302     if (rect_in)
303         *rect_out = *rect_in;
304     else
305     {
306         rect_out->left = 0;
307         rect_out->top = 0;
308         rect_out->right = surface->resource.width;
309         rect_out->bottom = surface->resource.height;
310     }
311 }
312
313 /* Context activation is done by the caller. */
314 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
315         const RECT *src_rect, const RECT *dst_rect, enum wined3d_texture_filter_type filter)
316 {
317     const struct wined3d_gl_info *gl_info = context->gl_info;
318     struct blt_info info;
319
320     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
321
322     gl_info->gl_ops.gl.p_glEnable(info.bind_target);
323     checkGLcall("glEnable(bind_target)");
324
325     context_bind_texture(context, info.bind_target, src_surface->texture_name);
326
327     /* Filtering for StretchRect */
328     gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
329             wined3d_gl_mag_filter(magLookup, filter));
330     checkGLcall("glTexParameteri");
331     gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
332             wined3d_gl_min_mip_filter(minMipLookup, filter, WINED3D_TEXF_NONE));
333     checkGLcall("glTexParameteri");
334     gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
335     gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
336     if (context->gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
337         gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_SRGB_DECODE_EXT, GL_SKIP_DECODE_EXT);
338     gl_info->gl_ops.gl.p_glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
339     checkGLcall("glTexEnvi");
340
341     /* Draw a quad */
342     gl_info->gl_ops.gl.p_glBegin(GL_TRIANGLE_STRIP);
343     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[0]);
344     gl_info->gl_ops.gl.p_glVertex2i(dst_rect->left, dst_rect->top);
345
346     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[1]);
347     gl_info->gl_ops.gl.p_glVertex2i(dst_rect->right, dst_rect->top);
348
349     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[2]);
350     gl_info->gl_ops.gl.p_glVertex2i(dst_rect->left, dst_rect->bottom);
351
352     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[3]);
353     gl_info->gl_ops.gl.p_glVertex2i(dst_rect->right, dst_rect->bottom);
354     gl_info->gl_ops.gl.p_glEnd();
355
356     /* Unbind the texture */
357     context_bind_texture(context, info.bind_target, 0);
358
359     /* We changed the filtering settings on the texture. Inform the
360      * container about this to get the filters reset properly next draw. */
361     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
362     {
363         struct wined3d_texture *texture = src_surface->container.u.texture;
364         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3D_TEXF_POINT;
365         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3D_TEXF_POINT;
366         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3D_TEXF_NONE;
367         texture->texture_rgb.states[WINED3DTEXSTA_SRGBTEXTURE] = FALSE;
368     }
369 }
370
371 /* Works correctly only for <= 4 bpp formats. */
372 static void get_color_masks(const struct wined3d_format *format, DWORD *masks)
373 {
374     masks[0] = ((1 << format->red_size) - 1) << format->red_offset;
375     masks[1] = ((1 << format->green_size) - 1) << format->green_offset;
376     masks[2] = ((1 << format->blue_size) - 1) << format->blue_offset;
377 }
378
379 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
380 {
381     const struct wined3d_format *format = surface->resource.format;
382     SYSTEM_INFO sysInfo;
383     BITMAPINFO *b_info;
384     int extraline = 0;
385     DWORD *masks;
386
387     TRACE("surface %p.\n", surface);
388
389     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
390     {
391         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
392         return WINED3DERR_INVALIDCALL;
393     }
394
395     switch (format->byte_count)
396     {
397         case 2:
398         case 4:
399             /* Allocate extra space to store the RGB bit masks. */
400             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
401             break;
402
403         case 3:
404             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
405             break;
406
407         default:
408             /* Allocate extra space for a palette. */
409             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
410                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
411             break;
412     }
413
414     if (!b_info)
415         return E_OUTOFMEMORY;
416
417     /* Some applications access the surface in via DWORDs, and do not take
418      * the necessary care at the end of the surface. So we need at least
419      * 4 extra bytes at the end of the surface. Check against the page size,
420      * if the last page used for the surface has at least 4 spare bytes we're
421      * safe, otherwise add an extra line to the DIB section. */
422     GetSystemInfo(&sysInfo);
423     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
424     {
425         extraline = 1;
426         TRACE("Adding an extra line to the DIB section.\n");
427     }
428
429     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
430     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
431     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
432     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
433     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
434             * wined3d_surface_get_pitch(surface);
435     b_info->bmiHeader.biPlanes = 1;
436     b_info->bmiHeader.biBitCount = format->byte_count * 8;
437
438     b_info->bmiHeader.biXPelsPerMeter = 0;
439     b_info->bmiHeader.biYPelsPerMeter = 0;
440     b_info->bmiHeader.biClrUsed = 0;
441     b_info->bmiHeader.biClrImportant = 0;
442
443     /* Get the bit masks */
444     masks = (DWORD *)b_info->bmiColors;
445     switch (surface->resource.format->id)
446     {
447         case WINED3DFMT_B8G8R8_UNORM:
448             b_info->bmiHeader.biCompression = BI_RGB;
449             break;
450
451         case WINED3DFMT_B5G5R5X1_UNORM:
452         case WINED3DFMT_B5G5R5A1_UNORM:
453         case WINED3DFMT_B4G4R4A4_UNORM:
454         case WINED3DFMT_B4G4R4X4_UNORM:
455         case WINED3DFMT_B2G3R3_UNORM:
456         case WINED3DFMT_B2G3R3A8_UNORM:
457         case WINED3DFMT_R10G10B10A2_UNORM:
458         case WINED3DFMT_R8G8B8A8_UNORM:
459         case WINED3DFMT_R8G8B8X8_UNORM:
460         case WINED3DFMT_B10G10R10A2_UNORM:
461         case WINED3DFMT_B5G6R5_UNORM:
462         case WINED3DFMT_R16G16B16A16_UNORM:
463             b_info->bmiHeader.biCompression = BI_BITFIELDS;
464             get_color_masks(format, masks);
465             break;
466
467         default:
468             /* Don't know palette */
469             b_info->bmiHeader.biCompression = BI_RGB;
470             break;
471     }
472
473     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
474             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
475             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
476     surface->dib.DIBsection = CreateDIBSection(0, b_info, DIB_RGB_COLORS, &surface->dib.bitmap_data, 0, 0);
477
478     if (!surface->dib.DIBsection)
479     {
480         ERR("Failed to create DIB section.\n");
481         HeapFree(GetProcessHeap(), 0, b_info);
482         return HRESULT_FROM_WIN32(GetLastError());
483     }
484
485     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
486     /* Copy the existing surface to the dib section. */
487     if (surface->resource.allocatedMemory)
488     {
489         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
490                 surface->resource.height * wined3d_surface_get_pitch(surface));
491     }
492     else
493     {
494         /* This is to make maps read the GL texture although memory is allocated. */
495         surface->flags &= ~SFLAG_INSYSMEM;
496     }
497     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
498
499     HeapFree(GetProcessHeap(), 0, b_info);
500
501     /* Now allocate a DC. */
502     surface->hDC = CreateCompatibleDC(0);
503     SelectObject(surface->hDC, surface->dib.DIBsection);
504     TRACE("Using wined3d palette %p.\n", surface->palette);
505     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
506
507     surface->flags |= SFLAG_DIBSECTION;
508
509     return WINED3D_OK;
510 }
511
512 static BOOL surface_need_pbo(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
513 {
514     if (surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
515         return FALSE;
516     if (!(surface->flags & SFLAG_DYNLOCK))
517         return FALSE;
518     if (surface->flags & (SFLAG_CONVERTED | SFLAG_NONPOW2 | SFLAG_PIN_SYSMEM))
519         return FALSE;
520     if (!gl_info->supported[ARB_PIXEL_BUFFER_OBJECT])
521         return FALSE;
522
523     return TRUE;
524 }
525
526 static void surface_load_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
527 {
528     struct wined3d_context *context;
529     GLenum error;
530
531     context = context_acquire(surface->resource.device, NULL);
532
533     GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
534     error = gl_info->gl_ops.gl.p_glGetError();
535     if (!surface->pbo || error != GL_NO_ERROR)
536         ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
537
538     TRACE("Binding PBO %u.\n", surface->pbo);
539
540     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
541     checkGLcall("glBindBufferARB");
542
543     GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
544             surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
545     checkGLcall("glBufferDataARB");
546
547     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
548     checkGLcall("glBindBufferARB");
549
550     /* We don't need the system memory anymore and we can't even use it for PBOs. */
551     if (!(surface->flags & SFLAG_CLIENT))
552     {
553         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
554         surface->resource.heapMemory = NULL;
555     }
556     surface->resource.allocatedMemory = NULL;
557     surface->flags |= SFLAG_PBO;
558     context_release(context);
559 }
560
561 static void surface_prepare_system_memory(struct wined3d_surface *surface)
562 {
563     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
564
565     TRACE("surface %p.\n", surface);
566
567     if (!(surface->flags & SFLAG_PBO) && surface_need_pbo(surface, gl_info))
568         surface_load_pbo(surface, gl_info);
569     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
570     {
571         /* Whatever surface we have, make sure that there is memory allocated
572          * for the downloaded copy, or a PBO to map. */
573         if (!surface->resource.heapMemory)
574             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
575
576         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
577                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
578
579         if (surface->flags & SFLAG_INSYSMEM)
580             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
581     }
582 }
583
584 static void surface_evict_sysmem(struct wined3d_surface *surface)
585 {
586     if (surface->resource.map_count || (surface->flags & SFLAG_DONOTFREE))
587         return;
588
589     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
590     surface->resource.allocatedMemory = NULL;
591     surface->resource.heapMemory = NULL;
592     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
593 }
594
595 /* Context activation is done by the caller. */
596 static void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
597 {
598     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
599
600     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
601     {
602         struct wined3d_texture *texture = surface->container.u.texture;
603
604         TRACE("Passing to container (%p).\n", texture);
605         texture->texture_ops->texture_bind(texture, context, srgb);
606     }
607     else
608     {
609         const struct wined3d_gl_info *gl_info = context->gl_info;
610
611         if (surface->texture_level)
612         {
613             ERR("Standalone surface %p is non-zero texture level %u.\n",
614                     surface, surface->texture_level);
615         }
616
617         if (srgb)
618             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
619
620         if (!surface->texture_name)
621         {
622             gl_info->gl_ops.gl.p_glGenTextures(1, &surface->texture_name);
623             checkGLcall("glGenTextures");
624
625             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
626
627             context_bind_texture(context, surface->texture_target, surface->texture_name);
628             gl_info->gl_ops.gl.p_glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
629             gl_info->gl_ops.gl.p_glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
630             gl_info->gl_ops.gl.p_glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
631             gl_info->gl_ops.gl.p_glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
632             gl_info->gl_ops.gl.p_glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
633             checkGLcall("glTexParameteri");
634         }
635         else
636         {
637             context_bind_texture(context, surface->texture_target, surface->texture_name);
638         }
639     }
640 }
641
642 /* Context activation is done by the caller. */
643 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
644         struct wined3d_context *context, BOOL srgb)
645 {
646     struct wined3d_device *device = surface->resource.device;
647     DWORD active_sampler;
648
649     /* We don't need a specific texture unit, but after binding the texture
650      * the current unit is dirty. Read the unit back instead of switching to
651      * 0, this avoids messing around with the state manager's GL states. The
652      * current texture unit should always be a valid one.
653      *
654      * To be more specific, this is tricky because we can implicitly be
655      * called from sampler() in state.c. This means we can't touch anything
656      * other than whatever happens to be the currently active texture, or we
657      * would risk marking already applied sampler states dirty again. */
658     active_sampler = device->rev_tex_unit_map[context->active_texture];
659
660     if (active_sampler != WINED3D_UNMAPPED_STAGE)
661         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
662     surface_bind(surface, context, srgb);
663 }
664
665 static void surface_force_reload(struct wined3d_surface *surface)
666 {
667     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
668 }
669
670 static void surface_release_client_storage(struct wined3d_surface *surface)
671 {
672     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
673     const struct wined3d_gl_info *gl_info = context->gl_info;
674
675     gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
676     if (surface->texture_name)
677     {
678         surface_bind_and_dirtify(surface, context, FALSE);
679         gl_info->gl_ops.gl.p_glTexImage2D(surface->texture_target, surface->texture_level,
680                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
681     }
682     if (surface->texture_name_srgb)
683     {
684         surface_bind_and_dirtify(surface, context, TRUE);
685         gl_info->gl_ops.gl.p_glTexImage2D(surface->texture_target, surface->texture_level,
686                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
687     }
688     gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
689
690     context_release(context);
691
692     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
693     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
694     surface_force_reload(surface);
695 }
696
697 static HRESULT surface_private_setup(struct wined3d_surface *surface)
698 {
699     /* TODO: Check against the maximum texture sizes supported by the video card. */
700     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
701     unsigned int pow2Width, pow2Height;
702
703     TRACE("surface %p.\n", surface);
704
705     surface->texture_name = 0;
706     surface->texture_target = GL_TEXTURE_2D;
707
708     /* Non-power2 support */
709     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
710     {
711         pow2Width = surface->resource.width;
712         pow2Height = surface->resource.height;
713     }
714     else
715     {
716         /* Find the nearest pow2 match */
717         pow2Width = pow2Height = 1;
718         while (pow2Width < surface->resource.width)
719             pow2Width <<= 1;
720         while (pow2Height < surface->resource.height)
721             pow2Height <<= 1;
722     }
723     surface->pow2Width = pow2Width;
724     surface->pow2Height = pow2Height;
725
726     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
727     {
728         /* TODO: Add support for non power two compressed textures. */
729         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
730         {
731             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
732                   surface, surface->resource.width, surface->resource.height);
733             return WINED3DERR_NOTAVAILABLE;
734         }
735     }
736
737     if (pow2Width != surface->resource.width
738             || pow2Height != surface->resource.height)
739     {
740         surface->flags |= SFLAG_NONPOW2;
741     }
742
743     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
744             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
745     {
746         /* One of three options:
747          * 1: Do the same as we do with NPOT and scale the texture, (any
748          *    texture ops would require the texture to be scaled which is
749          *    potentially slow)
750          * 2: Set the texture to the maximum size (bad idea).
751          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
752          * 4: Create the surface, but allow it to be used only for DirectDraw
753          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
754          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
755          *    the render target. */
756         if (surface->resource.pool == WINED3D_POOL_DEFAULT || surface->resource.pool == WINED3D_POOL_MANAGED)
757         {
758             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
759             return WINED3DERR_NOTAVAILABLE;
760         }
761
762         /* We should never use this surface in combination with OpenGL! */
763         TRACE("Creating an oversized surface: %ux%u.\n",
764                 surface->pow2Width, surface->pow2Height);
765     }
766     else
767     {
768         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
769          * and EXT_PALETTED_TEXTURE is used in combination with texture
770          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
771          * EXT_PALETTED_TEXTURE doesn't work in combination with
772          * ARB_TEXTURE_RECTANGLE. */
773         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
774                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
775                 && gl_info->supported[EXT_PALETTED_TEXTURE]
776                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
777         {
778             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
779             surface->pow2Width = surface->resource.width;
780             surface->pow2Height = surface->resource.height;
781             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
782         }
783     }
784
785     switch (wined3d_settings.offscreen_rendering_mode)
786     {
787         case ORM_FBO:
788             surface->get_drawable_size = get_drawable_size_fbo;
789             break;
790
791         case ORM_BACKBUFFER:
792             surface->get_drawable_size = get_drawable_size_backbuffer;
793             break;
794
795         default:
796             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
797             return WINED3DERR_INVALIDCALL;
798     }
799
800     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
801         surface->flags |= SFLAG_DISCARDED;
802
803     return WINED3D_OK;
804 }
805
806 static void surface_realize_palette(struct wined3d_surface *surface)
807 {
808     struct wined3d_palette *palette = surface->palette;
809
810     TRACE("surface %p.\n", surface);
811
812     if (!palette) return;
813
814     if (surface->resource.format->id == WINED3DFMT_P8_UINT
815             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
816     {
817         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
818         {
819             /* Make sure the texture is up to date. This call doesn't do
820              * anything if the texture is already up to date. */
821             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
822
823             /* We want to force a palette refresh, so mark the drawable as not being up to date */
824             if (!surface_is_offscreen(surface))
825                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
826         }
827         else
828         {
829             if (!(surface->flags & SFLAG_INSYSMEM))
830             {
831                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
832                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
833             }
834             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
835         }
836     }
837
838     if (surface->flags & SFLAG_DIBSECTION)
839     {
840         RGBQUAD col[256];
841         unsigned int i;
842
843         TRACE("Updating the DC's palette.\n");
844
845         for (i = 0; i < 256; ++i)
846         {
847             col[i].rgbRed   = palette->palents[i].peRed;
848             col[i].rgbGreen = palette->palents[i].peGreen;
849             col[i].rgbBlue  = palette->palents[i].peBlue;
850             col[i].rgbReserved = 0;
851         }
852         SetDIBColorTable(surface->hDC, 0, 256, col);
853     }
854
855     /* Propagate the changes to the drawable when we have a palette. */
856     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
857         surface_load_location(surface, surface->draw_binding, NULL);
858 }
859
860 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
861 {
862     HRESULT hr;
863
864     /* If there's no destination surface there is nothing to do. */
865     if (!surface->overlay_dest)
866         return WINED3D_OK;
867
868     /* Blt calls ModifyLocation on the dest surface, which in turn calls
869      * DrawOverlay to update the overlay. Prevent an endless recursion. */
870     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
871         return WINED3D_OK;
872
873     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
874     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
875             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3D_TEXF_LINEAR);
876     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
877
878     return hr;
879 }
880
881 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
882 {
883     struct wined3d_device *device = surface->resource.device;
884     const RECT *pass_rect = rect;
885
886     TRACE("surface %p, rect %s, flags %#x.\n",
887             surface, wine_dbgstr_rect(rect), flags);
888
889     if (flags & WINED3D_MAP_DISCARD)
890     {
891         TRACE("WINED3D_MAP_DISCARD flag passed, marking SYSMEM as up to date.\n");
892         surface_prepare_system_memory(surface);
893         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
894     }
895     else
896     {
897         if (surface->resource.usage & WINED3DUSAGE_DYNAMIC)
898             WARN_(d3d_perf)("Mapping a dynamic surface without WINED3D_MAP_DISCARD.\n");
899
900         /* surface_load_location() does not check if the rectangle specifies
901          * the full surface. Most callers don't need that, so do it here. */
902         if (rect && !rect->top && !rect->left
903                 && rect->right == surface->resource.width
904                 && rect->bottom == surface->resource.height)
905             pass_rect = NULL;
906         surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
907     }
908
909     if (surface->flags & SFLAG_PBO)
910     {
911         const struct wined3d_gl_info *gl_info;
912         struct wined3d_context *context;
913
914         context = context_acquire(device, NULL);
915         gl_info = context->gl_info;
916
917         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
918         checkGLcall("glBindBufferARB");
919
920         /* This shouldn't happen but could occur if some other function
921          * didn't handle the PBO properly. */
922         if (surface->resource.allocatedMemory)
923             ERR("The surface already has PBO memory allocated.\n");
924
925         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
926         checkGLcall("glMapBufferARB");
927
928         /* Make sure the PBO isn't set anymore in order not to break non-PBO
929          * calls. */
930         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
931         checkGLcall("glBindBufferARB");
932
933         context_release(context);
934     }
935
936     if (!(flags & (WINED3D_MAP_NO_DIRTY_UPDATE | WINED3D_MAP_READONLY)))
937     {
938         if (!rect)
939             surface_add_dirty_rect(surface, NULL);
940         else
941         {
942             struct wined3d_box b;
943
944             b.left = rect->left;
945             b.top = rect->top;
946             b.right = rect->right;
947             b.bottom = rect->bottom;
948             b.front = 0;
949             b.back = 1;
950             surface_add_dirty_rect(surface, &b);
951         }
952     }
953 }
954
955 static void surface_unmap(struct wined3d_surface *surface)
956 {
957     struct wined3d_device *device = surface->resource.device;
958     BOOL fullsurface;
959
960     TRACE("surface %p.\n", surface);
961
962     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
963
964     if (surface->flags & SFLAG_PBO)
965     {
966         const struct wined3d_gl_info *gl_info;
967         struct wined3d_context *context;
968
969         TRACE("Freeing PBO memory.\n");
970
971         context = context_acquire(device, NULL);
972         gl_info = context->gl_info;
973
974         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
975         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
976         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
977         checkGLcall("glUnmapBufferARB");
978         context_release(context);
979
980         surface->resource.allocatedMemory = NULL;
981     }
982
983     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
984
985     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
986     {
987         TRACE("Not dirtified, nothing to do.\n");
988         goto done;
989     }
990
991     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
992             && surface->container.u.swapchain->front_buffer == surface)
993     {
994         if (!surface->dirtyRect.left && !surface->dirtyRect.top
995                 && surface->dirtyRect.right == surface->resource.width
996                 && surface->dirtyRect.bottom == surface->resource.height)
997         {
998             fullsurface = TRUE;
999         }
1000         else
1001         {
1002             /* TODO: Proper partial rectangle tracking. */
1003             fullsurface = FALSE;
1004             surface->flags |= SFLAG_INSYSMEM;
1005         }
1006
1007         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
1008
1009         /* Partial rectangle tracking is not commonly implemented, it is only
1010          * done for render targets. INSYSMEM was set before to tell
1011          * surface_load_location() where to read the rectangle from.
1012          * Indrawable is set because all modifications from the partial
1013          * sysmem copy are written back to the drawable, thus the surface is
1014          * merged again in the drawable. The sysmem copy is not fully up to
1015          * date because only a subrectangle was read in Map(). */
1016         if (!fullsurface)
1017         {
1018             surface_modify_location(surface, surface->draw_binding, TRUE);
1019             surface_evict_sysmem(surface);
1020         }
1021
1022         surface->dirtyRect.left = surface->resource.width;
1023         surface->dirtyRect.top = surface->resource.height;
1024         surface->dirtyRect.right = 0;
1025         surface->dirtyRect.bottom = 0;
1026     }
1027     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
1028     {
1029         FIXME("Depth / stencil buffer locking is not implemented.\n");
1030     }
1031
1032 done:
1033     /* Overlays have to be redrawn manually after changes with the GL implementation */
1034     if (surface->overlay_dest)
1035         surface_draw_overlay(surface);
1036 }
1037
1038 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1039 {
1040     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1041         return FALSE;
1042     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1043         return FALSE;
1044     return TRUE;
1045 }
1046
1047 static void surface_depth_blt_fbo(const struct wined3d_device *device,
1048         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect,
1049         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect)
1050 {
1051     const struct wined3d_gl_info *gl_info;
1052     struct wined3d_context *context;
1053     DWORD src_mask, dst_mask;
1054     GLbitfield gl_mask;
1055
1056     TRACE("device %p\n", device);
1057     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1058             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect));
1059     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1060             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect));
1061
1062     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1063     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1064
1065     if (src_mask != dst_mask)
1066     {
1067         ERR("Incompatible formats %s and %s.\n",
1068                 debug_d3dformat(src_surface->resource.format->id),
1069                 debug_d3dformat(dst_surface->resource.format->id));
1070         return;
1071     }
1072
1073     if (!src_mask)
1074     {
1075         ERR("Not a depth / stencil format: %s.\n",
1076                 debug_d3dformat(src_surface->resource.format->id));
1077         return;
1078     }
1079
1080     gl_mask = 0;
1081     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1082         gl_mask |= GL_DEPTH_BUFFER_BIT;
1083     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1084         gl_mask |= GL_STENCIL_BUFFER_BIT;
1085
1086     /* Make sure the locations are up-to-date. Loading the destination
1087      * surface isn't required if the entire surface is overwritten. */
1088     surface_load_location(src_surface, src_location, NULL);
1089     if (!surface_is_full_rect(dst_surface, dst_rect))
1090         surface_load_location(dst_surface, dst_location, NULL);
1091
1092     context = context_acquire(device, NULL);
1093     if (!context->valid)
1094     {
1095         context_release(context);
1096         WARN("Invalid context, skipping blit.\n");
1097         return;
1098     }
1099
1100     gl_info = context->gl_info;
1101
1102     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, src_location);
1103     gl_info->gl_ops.gl.p_glReadBuffer(GL_NONE);
1104     checkGLcall("glReadBuffer()");
1105     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1106
1107     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, dst_location);
1108     context_set_draw_buffer(context, GL_NONE);
1109     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1110     context_invalidate_state(context, STATE_FRAMEBUFFER);
1111
1112     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1113     {
1114         gl_info->gl_ops.gl.p_glDepthMask(GL_TRUE);
1115         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_ZWRITEENABLE));
1116     }
1117     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1118     {
1119         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1120         {
1121             gl_info->gl_ops.gl.p_glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1122             context_invalidate_state(context, STATE_RENDER(WINED3D_RS_TWOSIDEDSTENCILMODE));
1123         }
1124         gl_info->gl_ops.gl.p_glStencilMask(~0U);
1125         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_STENCILWRITEMASK));
1126     }
1127
1128     gl_info->gl_ops.gl.p_glDisable(GL_SCISSOR_TEST);
1129     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1130
1131     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1132             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1133     checkGLcall("glBlitFramebuffer()");
1134
1135     if (wined3d_settings.strict_draw_ordering)
1136         gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */
1137
1138     context_release(context);
1139 }
1140
1141 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1142  * Depth / stencil is not supported. */
1143 static void surface_blt_fbo(const struct wined3d_device *device, enum wined3d_texture_filter_type filter,
1144         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1145         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1146 {
1147     const struct wined3d_gl_info *gl_info;
1148     struct wined3d_context *context;
1149     RECT src_rect, dst_rect;
1150     GLenum gl_filter;
1151     GLenum buffer;
1152
1153     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1154     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1155             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1156     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1157             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1158
1159     src_rect = *src_rect_in;
1160     dst_rect = *dst_rect_in;
1161
1162     switch (filter)
1163     {
1164         case WINED3D_TEXF_LINEAR:
1165             gl_filter = GL_LINEAR;
1166             break;
1167
1168         default:
1169             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1170         case WINED3D_TEXF_NONE:
1171         case WINED3D_TEXF_POINT:
1172             gl_filter = GL_NEAREST;
1173             break;
1174     }
1175
1176     /* Resolve the source surface first if needed. */
1177     if (src_location == SFLAG_INRB_MULTISAMPLE
1178             && (src_surface->resource.format->id != dst_surface->resource.format->id
1179                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1180                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1181         src_location = SFLAG_INRB_RESOLVED;
1182
1183     /* Make sure the locations are up-to-date. Loading the destination
1184      * surface isn't required if the entire surface is overwritten. (And is
1185      * in fact harmful if we're being called by surface_load_location() with
1186      * the purpose of loading the destination surface.) */
1187     surface_load_location(src_surface, src_location, NULL);
1188     if (!surface_is_full_rect(dst_surface, &dst_rect))
1189         surface_load_location(dst_surface, dst_location, NULL);
1190
1191     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1192     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1193     else context = context_acquire(device, NULL);
1194
1195     if (!context->valid)
1196     {
1197         context_release(context);
1198         WARN("Invalid context, skipping blit.\n");
1199         return;
1200     }
1201
1202     gl_info = context->gl_info;
1203
1204     if (src_location == SFLAG_INDRAWABLE)
1205     {
1206         TRACE("Source surface %p is onscreen.\n", src_surface);
1207         buffer = surface_get_gl_buffer(src_surface);
1208         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1209     }
1210     else
1211     {
1212         TRACE("Source surface %p is offscreen.\n", src_surface);
1213         buffer = GL_COLOR_ATTACHMENT0;
1214     }
1215
1216     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1217     gl_info->gl_ops.gl.p_glReadBuffer(buffer);
1218     checkGLcall("glReadBuffer()");
1219     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1220
1221     if (dst_location == SFLAG_INDRAWABLE)
1222     {
1223         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1224         buffer = surface_get_gl_buffer(dst_surface);
1225         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1226     }
1227     else
1228     {
1229         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1230         buffer = GL_COLOR_ATTACHMENT0;
1231     }
1232
1233     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1234     context_set_draw_buffer(context, buffer);
1235     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1236     context_invalidate_state(context, STATE_FRAMEBUFFER);
1237
1238     gl_info->gl_ops.gl.p_glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1239     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE));
1240     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE1));
1241     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE2));
1242     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE3));
1243
1244     gl_info->gl_ops.gl.p_glDisable(GL_SCISSOR_TEST);
1245     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1246
1247     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1248             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1249     checkGLcall("glBlitFramebuffer()");
1250
1251     if (wined3d_settings.strict_draw_ordering
1252             || (dst_location == SFLAG_INDRAWABLE
1253             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1254         gl_info->gl_ops.gl.p_glFlush();
1255
1256     context_release(context);
1257 }
1258
1259 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1260         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
1261         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
1262 {
1263     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1264         return FALSE;
1265
1266     /* Source and/or destination need to be on the GL side */
1267     if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM)
1268         return FALSE;
1269
1270     switch (blit_op)
1271     {
1272         case WINED3D_BLIT_OP_COLOR_BLIT:
1273             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1274                 return FALSE;
1275             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1276                 return FALSE;
1277             break;
1278
1279         case WINED3D_BLIT_OP_DEPTH_BLIT:
1280             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1281                 return FALSE;
1282             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1283                 return FALSE;
1284             break;
1285
1286         default:
1287             return FALSE;
1288     }
1289
1290     if (!(src_format->id == dst_format->id
1291             || (is_identity_fixup(src_format->color_fixup)
1292             && is_identity_fixup(dst_format->color_fixup))))
1293         return FALSE;
1294
1295     return TRUE;
1296 }
1297
1298 /* This function checks if the primary render target uses the 8bit paletted format. */
1299 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1300 {
1301     if (device->fb.render_targets && device->fb.render_targets[0])
1302     {
1303         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1304         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1305                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1306             return TRUE;
1307     }
1308     return FALSE;
1309 }
1310
1311 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1312         DWORD color, struct wined3d_color *float_color)
1313 {
1314     const struct wined3d_format *format = surface->resource.format;
1315     const struct wined3d_device *device = surface->resource.device;
1316
1317     switch (format->id)
1318     {
1319         case WINED3DFMT_P8_UINT:
1320             if (surface->palette)
1321             {
1322                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1323                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1324                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1325             }
1326             else
1327             {
1328                 float_color->r = 0.0f;
1329                 float_color->g = 0.0f;
1330                 float_color->b = 0.0f;
1331             }
1332             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1333             break;
1334
1335         case WINED3DFMT_B5G6R5_UNORM:
1336             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1337             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1338             float_color->b = (color & 0x1f) / 31.0f;
1339             float_color->a = 1.0f;
1340             break;
1341
1342         case WINED3DFMT_B8G8R8_UNORM:
1343         case WINED3DFMT_B8G8R8X8_UNORM:
1344             float_color->r = D3DCOLOR_R(color);
1345             float_color->g = D3DCOLOR_G(color);
1346             float_color->b = D3DCOLOR_B(color);
1347             float_color->a = 1.0f;
1348             break;
1349
1350         case WINED3DFMT_B8G8R8A8_UNORM:
1351             float_color->r = D3DCOLOR_R(color);
1352             float_color->g = D3DCOLOR_G(color);
1353             float_color->b = D3DCOLOR_B(color);
1354             float_color->a = D3DCOLOR_A(color);
1355             break;
1356
1357         default:
1358             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1359             return FALSE;
1360     }
1361
1362     return TRUE;
1363 }
1364
1365 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1366 {
1367     const struct wined3d_format *format = surface->resource.format;
1368
1369     switch (format->id)
1370     {
1371         case WINED3DFMT_S1_UINT_D15_UNORM:
1372             *float_depth = depth / (float)0x00007fff;
1373             break;
1374
1375         case WINED3DFMT_D16_UNORM:
1376             *float_depth = depth / (float)0x0000ffff;
1377             break;
1378
1379         case WINED3DFMT_D24_UNORM_S8_UINT:
1380         case WINED3DFMT_X8D24_UNORM:
1381             *float_depth = depth / (float)0x00ffffff;
1382             break;
1383
1384         case WINED3DFMT_D32_UNORM:
1385             *float_depth = depth / (float)0xffffffff;
1386             break;
1387
1388         default:
1389             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1390             return FALSE;
1391     }
1392
1393     return TRUE;
1394 }
1395
1396 /* Do not call while under the GL lock. */
1397 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1398 {
1399     const struct wined3d_resource *resource = &surface->resource;
1400     struct wined3d_device *device = resource->device;
1401     const struct blit_shader *blitter;
1402
1403     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1404             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1405     if (!blitter)
1406     {
1407         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1408         return WINED3DERR_INVALIDCALL;
1409     }
1410
1411     return blitter->depth_fill(device, surface, rect, depth);
1412 }
1413
1414 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect,
1415         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect)
1416 {
1417     struct wined3d_device *device = src_surface->resource.device;
1418
1419     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1420             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1421             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1422         return WINED3DERR_INVALIDCALL;
1423
1424     surface_depth_blt_fbo(device, src_surface, src_location, src_rect, dst_surface, dst_location, dst_rect);
1425
1426     surface_modify_ds_location(dst_surface, dst_location,
1427             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1428
1429     return WINED3D_OK;
1430 }
1431
1432 /* Do not call while under the GL lock. */
1433 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1434         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1435         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter)
1436 {
1437     struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1438     struct wined3d_device *device = dst_surface->resource.device;
1439     DWORD src_ds_flags, dst_ds_flags;
1440     RECT src_rect, dst_rect;
1441     BOOL scale, convert;
1442
1443     static const DWORD simple_blit = WINEDDBLT_ASYNC
1444             | WINEDDBLT_COLORFILL
1445             | WINEDDBLT_WAIT
1446             | WINEDDBLT_DEPTHFILL
1447             | WINEDDBLT_DONOTWAIT;
1448
1449     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1450             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1451             flags, fx, debug_d3dtexturefiltertype(filter));
1452     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1453
1454     if (fx)
1455     {
1456         TRACE("dwSize %#x.\n", fx->dwSize);
1457         TRACE("dwDDFX %#x.\n", fx->dwDDFX);
1458         TRACE("dwROP %#x.\n", fx->dwROP);
1459         TRACE("dwDDROP %#x.\n", fx->dwDDROP);
1460         TRACE("dwRotationAngle %#x.\n", fx->dwRotationAngle);
1461         TRACE("dwZBufferOpCode %#x.\n", fx->dwZBufferOpCode);
1462         TRACE("dwZBufferLow %#x.\n", fx->dwZBufferLow);
1463         TRACE("dwZBufferHigh %#x.\n", fx->dwZBufferHigh);
1464         TRACE("dwZBufferBaseDest %#x.\n", fx->dwZBufferBaseDest);
1465         TRACE("dwZDestConstBitDepth %#x.\n", fx->dwZDestConstBitDepth);
1466         TRACE("lpDDSZBufferDest %p.\n", fx->u1.lpDDSZBufferDest);
1467         TRACE("dwZSrcConstBitDepth %#x.\n", fx->dwZSrcConstBitDepth);
1468         TRACE("lpDDSZBufferSrc %p.\n", fx->u2.lpDDSZBufferSrc);
1469         TRACE("dwAlphaEdgeBlendBitDepth %#x.\n", fx->dwAlphaEdgeBlendBitDepth);
1470         TRACE("dwAlphaEdgeBlend %#x.\n", fx->dwAlphaEdgeBlend);
1471         TRACE("dwReserved %#x.\n", fx->dwReserved);
1472         TRACE("dwAlphaDestConstBitDepth %#x.\n", fx->dwAlphaDestConstBitDepth);
1473         TRACE("lpDDSAlphaDest %p.\n", fx->u3.lpDDSAlphaDest);
1474         TRACE("dwAlphaSrcConstBitDepth %#x.\n", fx->dwAlphaSrcConstBitDepth);
1475         TRACE("lpDDSAlphaSrc %p.\n", fx->u4.lpDDSAlphaSrc);
1476         TRACE("lpDDSPattern %p.\n", fx->u5.lpDDSPattern);
1477         TRACE("ddckDestColorkey {%#x, %#x}.\n",
1478                 fx->ddckDestColorkey.color_space_low_value,
1479                 fx->ddckDestColorkey.color_space_high_value);
1480         TRACE("ddckSrcColorkey {%#x, %#x}.\n",
1481                 fx->ddckSrcColorkey.color_space_low_value,
1482                 fx->ddckSrcColorkey.color_space_high_value);
1483     }
1484
1485     if (dst_surface->resource.map_count || (src_surface && src_surface->resource.map_count))
1486     {
1487         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1488         return WINEDDERR_SURFACEBUSY;
1489     }
1490
1491     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1492
1493     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1494             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1495             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1496             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1497             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1498     {
1499         WARN("The application gave us a bad destination rectangle.\n");
1500         return WINEDDERR_INVALIDRECT;
1501     }
1502
1503     if (src_surface)
1504     {
1505         surface_get_rect(src_surface, src_rect_in, &src_rect);
1506
1507         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1508                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1509                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1510                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1511                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1512         {
1513             WARN("Application gave us bad source rectangle for Blt.\n");
1514             return WINEDDERR_INVALIDRECT;
1515         }
1516     }
1517     else
1518     {
1519         memset(&src_rect, 0, sizeof(src_rect));
1520     }
1521
1522     if (!fx || !(fx->dwDDFX))
1523         flags &= ~WINEDDBLT_DDFX;
1524
1525     if (flags & WINEDDBLT_WAIT)
1526         flags &= ~WINEDDBLT_WAIT;
1527
1528     if (flags & WINEDDBLT_ASYNC)
1529     {
1530         static unsigned int once;
1531
1532         if (!once++)
1533             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1534         flags &= ~WINEDDBLT_ASYNC;
1535     }
1536
1537     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1538     if (flags & WINEDDBLT_DONOTWAIT)
1539     {
1540         static unsigned int once;
1541
1542         if (!once++)
1543             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1544         flags &= ~WINEDDBLT_DONOTWAIT;
1545     }
1546
1547     if (!device->d3d_initialized)
1548     {
1549         WARN("D3D not initialized, using fallback.\n");
1550         goto cpu;
1551     }
1552
1553     /* We want to avoid invalidating the sysmem location for converted
1554      * surfaces, since otherwise we'd have to convert the data back when
1555      * locking them. */
1556     if (dst_surface->flags & SFLAG_CONVERTED)
1557     {
1558         WARN_(d3d_perf)("Converted surface, using CPU blit.\n");
1559         return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1560     }
1561
1562     if (flags & ~simple_blit)
1563     {
1564         WARN_(d3d_perf)("Using fallback for complex blit (%#x).\n", flags);
1565         goto fallback;
1566     }
1567
1568     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1569         src_swapchain = src_surface->container.u.swapchain;
1570     else
1571         src_swapchain = NULL;
1572
1573     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1574         dst_swapchain = dst_surface->container.u.swapchain;
1575     else
1576         dst_swapchain = NULL;
1577
1578     /* This isn't strictly needed. FBO blits for example could deal with
1579      * cross-swapchain blits by first downloading the source to a texture
1580      * before switching to the destination context. We just have this here to
1581      * not have to deal with the issue, since cross-swapchain blits should be
1582      * rare. */
1583     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1584     {
1585         FIXME("Using fallback for cross-swapchain blit.\n");
1586         goto fallback;
1587     }
1588
1589     scale = src_surface
1590             && (src_rect.right - src_rect.left != dst_rect.right - dst_rect.left
1591             || src_rect.bottom - src_rect.top != dst_rect.bottom - dst_rect.top);
1592     convert = src_surface && src_surface->resource.format->id != dst_surface->resource.format->id;
1593
1594     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1595     if (src_surface)
1596         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1597     else
1598         src_ds_flags = 0;
1599
1600     if (src_ds_flags || dst_ds_flags)
1601     {
1602         if (flags & WINEDDBLT_DEPTHFILL)
1603         {
1604             float depth;
1605
1606             TRACE("Depth fill.\n");
1607
1608             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1609                 return WINED3DERR_INVALIDCALL;
1610
1611             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1612                 return WINED3D_OK;
1613         }
1614         else
1615         {
1616             if (src_ds_flags != dst_ds_flags)
1617             {
1618                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1619                 return WINED3DERR_INVALIDCALL;
1620             }
1621
1622             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, src_surface->draw_binding, &src_rect,
1623                     dst_surface, dst_surface->draw_binding, &dst_rect)))
1624                 return WINED3D_OK;
1625         }
1626     }
1627     else
1628     {
1629         /* In principle this would apply to depth blits as well, but we don't
1630          * implement those in the CPU blitter at the moment. */
1631         if ((dst_surface->flags & SFLAG_INSYSMEM)
1632                 && (!src_surface || (src_surface->flags & SFLAG_INSYSMEM)))
1633         {
1634             if (scale)
1635                 TRACE("Not doing sysmem blit because of scaling.\n");
1636             else if (convert)
1637                 TRACE("Not doing sysmem blit because of format conversion.\n");
1638             else
1639                 return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1640         }
1641
1642         if (flags & WINEDDBLT_COLORFILL)
1643         {
1644             struct wined3d_color color;
1645
1646             TRACE("Color fill.\n");
1647
1648             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1649                 goto fallback;
1650
1651             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1652                 return WINED3D_OK;
1653         }
1654         else
1655         {
1656             TRACE("Color blit.\n");
1657
1658             /* Upload */
1659             if ((src_surface->flags & SFLAG_INSYSMEM) && !(dst_surface->flags & SFLAG_INSYSMEM))
1660             {
1661                 if (scale)
1662                     TRACE("Not doing upload because of scaling.\n");
1663                 else if (convert)
1664                     TRACE("Not doing upload because of format conversion.\n");
1665                 else
1666                 {
1667                     POINT dst_point = {dst_rect.left, dst_rect.top};
1668
1669                     if (SUCCEEDED(surface_upload_from_surface(dst_surface, &dst_point, src_surface, &src_rect)))
1670                     {
1671                         if (!surface_is_offscreen(dst_surface))
1672                             surface_load_location(dst_surface, dst_surface->draw_binding, NULL);
1673                         return WINED3D_OK;
1674                     }
1675                 }
1676             }
1677
1678             /* Use present for back -> front blits. The idea behind this is
1679              * that present is potentially faster than a blit, in particular
1680              * when FBO blits aren't available. Some ddraw applications like
1681              * Half-Life and Prince of Persia 3D use Blt() from the backbuffer
1682              * to the frontbuffer instead of doing a Flip(). D3D8 and D3D9
1683              * applications can't blit directly to the frontbuffer. */
1684             if (dst_swapchain && dst_swapchain->back_buffers
1685                     && dst_surface == dst_swapchain->front_buffer
1686                     && src_surface == dst_swapchain->back_buffers[0])
1687             {
1688                 enum wined3d_swap_effect swap_effect = dst_swapchain->desc.swap_effect;
1689
1690                 TRACE("Using present for backbuffer -> frontbuffer blit.\n");
1691
1692                 /* Set the swap effect to COPY, we don't want the backbuffer
1693                  * to become undefined. */
1694                 dst_swapchain->desc.swap_effect = WINED3D_SWAP_EFFECT_COPY;
1695                 wined3d_swapchain_present(dst_swapchain, NULL, NULL, dst_swapchain->win_handle, NULL, 0);
1696                 dst_swapchain->desc.swap_effect = swap_effect;
1697
1698                 return WINED3D_OK;
1699             }
1700
1701             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1702                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1703                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1704             {
1705                 TRACE("Using FBO blit.\n");
1706
1707                 surface_blt_fbo(device, filter,
1708                         src_surface, src_surface->draw_binding, &src_rect,
1709                         dst_surface, dst_surface->draw_binding, &dst_rect);
1710                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1711                 return WINED3D_OK;
1712             }
1713
1714             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1715                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1716                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1717             {
1718                 TRACE("Using arbfp blit.\n");
1719
1720                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1721                     return WINED3D_OK;
1722             }
1723         }
1724     }
1725
1726 fallback:
1727
1728     /* Special cases for render targets. */
1729     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1730             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1731     {
1732         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1733                 src_surface, &src_rect, flags, fx, filter)))
1734             return WINED3D_OK;
1735     }
1736
1737 cpu:
1738
1739     /* For the rest call the X11 surface implementation. For render targets
1740      * this should be implemented OpenGL accelerated in BltOverride, other
1741      * blits are rather rare. */
1742     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1743 }
1744
1745 HRESULT CDECL wined3d_surface_get_render_target_data(struct wined3d_surface *surface,
1746         struct wined3d_surface *render_target)
1747 {
1748     TRACE("surface %p, render_target %p.\n", surface, render_target);
1749
1750     /* TODO: Check surface sizes, pools, etc. */
1751
1752     if (render_target->resource.multisample_type)
1753         return WINED3DERR_INVALIDCALL;
1754
1755     return wined3d_surface_blt(surface, NULL, render_target, NULL, 0, NULL, WINED3D_TEXF_POINT);
1756 }
1757
1758 /* Context activation is done by the caller. */
1759 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1760 {
1761     if (surface->flags & SFLAG_DIBSECTION)
1762     {
1763         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1764     }
1765     else
1766     {
1767         if (!surface->resource.heapMemory)
1768             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1769         else if (!(surface->flags & SFLAG_CLIENT))
1770             ERR("Surface %p has heapMemory %p and flags %#x.\n",
1771                     surface, surface->resource.heapMemory, surface->flags);
1772
1773         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1774                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1775     }
1776
1777     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1778     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1779     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1780             surface->resource.size, surface->resource.allocatedMemory));
1781     checkGLcall("glGetBufferSubDataARB");
1782     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1783     checkGLcall("glDeleteBuffersARB");
1784
1785     surface->pbo = 0;
1786     surface->flags &= ~SFLAG_PBO;
1787 }
1788
1789 static BOOL surface_init_sysmem(struct wined3d_surface *surface)
1790 {
1791     if (!surface->resource.allocatedMemory)
1792     {
1793         if (!surface->resource.heapMemory)
1794         {
1795             if (!(surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
1796                     surface->resource.size + RESOURCE_ALIGNMENT)))
1797             {
1798                 ERR("Failed to allocate memory.\n");
1799                 return FALSE;
1800             }
1801         }
1802         else if (!(surface->flags & SFLAG_CLIENT))
1803         {
1804             ERR("Surface %p has heapMemory %p and flags %#x.\n",
1805                     surface, surface->resource.heapMemory, surface->flags);
1806         }
1807
1808         surface->resource.allocatedMemory =
1809             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1810     }
1811     else
1812     {
1813         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
1814     }
1815
1816     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1817
1818     return TRUE;
1819 }
1820
1821 /* Do not call while under the GL lock. */
1822 static void surface_unload(struct wined3d_resource *resource)
1823 {
1824     struct wined3d_surface *surface = surface_from_resource(resource);
1825     struct wined3d_renderbuffer_entry *entry, *entry2;
1826     struct wined3d_device *device = resource->device;
1827     const struct wined3d_gl_info *gl_info;
1828     struct wined3d_context *context;
1829
1830     TRACE("surface %p.\n", surface);
1831
1832     if (resource->pool == WINED3D_POOL_DEFAULT)
1833     {
1834         /* Default pool resources are supposed to be destroyed before Reset is called.
1835          * Implicit resources stay however. So this means we have an implicit render target
1836          * or depth stencil. The content may be destroyed, but we still have to tear down
1837          * opengl resources, so we cannot leave early.
1838          *
1839          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1840          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1841          * or the depth stencil into an FBO the texture or render buffer will be removed
1842          * and all flags get lost
1843          */
1844         if (!(surface->flags & SFLAG_PBO))
1845             surface_init_sysmem(surface);
1846         /* We also get here when the ddraw swapchain is destroyed, for example
1847          * for a mode switch. In this case this surface won't necessarily be
1848          * an implicit surface. We have to mark it lost so that the
1849          * application can restore it after the mode switch. */
1850         surface->flags |= SFLAG_LOST;
1851     }
1852     else
1853     {
1854         /* Load the surface into system memory */
1855         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1856         surface_modify_location(surface, surface->draw_binding, FALSE);
1857     }
1858     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1859     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1860     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1861
1862     context = context_acquire(device, NULL);
1863     gl_info = context->gl_info;
1864
1865     /* Destroy PBOs, but load them into real sysmem before */
1866     if (surface->flags & SFLAG_PBO)
1867         surface_remove_pbo(surface, gl_info);
1868
1869     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1870      * all application-created targets the application has to release the surface
1871      * before calling _Reset
1872      */
1873     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1874     {
1875         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1876         list_remove(&entry->entry);
1877         HeapFree(GetProcessHeap(), 0, entry);
1878     }
1879     list_init(&surface->renderbuffers);
1880     surface->current_renderbuffer = NULL;
1881
1882     /* If we're in a texture, the texture name belongs to the texture.
1883      * Otherwise, destroy it. */
1884     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1885     {
1886         gl_info->gl_ops.gl.p_glDeleteTextures(1, &surface->texture_name);
1887         surface->texture_name = 0;
1888         gl_info->gl_ops.gl.p_glDeleteTextures(1, &surface->texture_name_srgb);
1889         surface->texture_name_srgb = 0;
1890     }
1891     if (surface->rb_multisample)
1892     {
1893         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1894         surface->rb_multisample = 0;
1895     }
1896     if (surface->rb_resolved)
1897     {
1898         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1899         surface->rb_resolved = 0;
1900     }
1901
1902     context_release(context);
1903
1904     resource_unload(resource);
1905 }
1906
1907 static const struct wined3d_resource_ops surface_resource_ops =
1908 {
1909     surface_unload,
1910 };
1911
1912 static const struct wined3d_surface_ops surface_ops =
1913 {
1914     surface_private_setup,
1915     surface_realize_palette,
1916     surface_map,
1917     surface_unmap,
1918 };
1919
1920 /*****************************************************************************
1921  * Initializes the GDI surface, aka creates the DIB section we render to
1922  * The DIB section creation is done by calling GetDC, which will create the
1923  * section and releasing the dc to allow the app to use it. The dib section
1924  * will stay until the surface is released
1925  *
1926  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1927  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1928  * avoid confusion in the shared surface code.
1929  *
1930  * Returns:
1931  *  WINED3D_OK on success
1932  *  The return values of called methods on failure
1933  *
1934  *****************************************************************************/
1935 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1936 {
1937     HRESULT hr;
1938
1939     TRACE("surface %p.\n", surface);
1940
1941     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1942     {
1943         ERR("Overlays not yet supported by GDI surfaces.\n");
1944         return WINED3DERR_INVALIDCALL;
1945     }
1946
1947     /* Sysmem textures have memory already allocated - release it,
1948      * this avoids an unnecessary memcpy. */
1949     hr = surface_create_dib_section(surface);
1950     if (SUCCEEDED(hr))
1951     {
1952         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1953         surface->resource.heapMemory = NULL;
1954         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1955     }
1956
1957     /* We don't mind the nonpow2 stuff in GDI. */
1958     surface->pow2Width = surface->resource.width;
1959     surface->pow2Height = surface->resource.height;
1960
1961     return WINED3D_OK;
1962 }
1963
1964 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1965 {
1966     struct wined3d_palette *palette = surface->palette;
1967
1968     TRACE("surface %p.\n", surface);
1969
1970     if (!palette) return;
1971
1972     if (surface->flags & SFLAG_DIBSECTION)
1973     {
1974         RGBQUAD col[256];
1975         unsigned int i;
1976
1977         TRACE("Updating the DC's palette.\n");
1978
1979         for (i = 0; i < 256; ++i)
1980         {
1981             col[i].rgbRed = palette->palents[i].peRed;
1982             col[i].rgbGreen = palette->palents[i].peGreen;
1983             col[i].rgbBlue = palette->palents[i].peBlue;
1984             col[i].rgbReserved = 0;
1985         }
1986         SetDIBColorTable(surface->hDC, 0, 256, col);
1987     }
1988
1989     /* Update the image because of the palette change. Some games like e.g.
1990      * Red Alert call SetEntries a lot to implement fading. */
1991     /* Tell the swapchain to update the screen. */
1992     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1993     {
1994         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1995         if (surface == swapchain->front_buffer)
1996         {
1997             x11_copy_to_screen(swapchain, NULL);
1998         }
1999     }
2000 }
2001
2002 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
2003 {
2004     TRACE("surface %p, rect %s, flags %#x.\n",
2005             surface, wine_dbgstr_rect(rect), flags);
2006
2007     if (!(surface->flags & SFLAG_DIBSECTION))
2008     {
2009         HRESULT hr;
2010
2011         /* This happens on gdi surfaces if the application set a user pointer
2012          * and resets it. Recreate the DIB section. */
2013         if (FAILED(hr = surface_create_dib_section(surface)))
2014         {
2015             ERR("Failed to create dib section, hr %#x.\n", hr);
2016             return;
2017         }
2018         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
2019         surface->resource.heapMemory = NULL;
2020         surface->resource.allocatedMemory = surface->dib.bitmap_data;
2021     }
2022 }
2023
2024 static void gdi_surface_unmap(struct wined3d_surface *surface)
2025 {
2026     TRACE("surface %p.\n", surface);
2027
2028     /* Tell the swapchain to update the screen. */
2029     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2030     {
2031         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2032         if (surface == swapchain->front_buffer)
2033         {
2034             x11_copy_to_screen(swapchain, &surface->lockedRect);
2035         }
2036     }
2037
2038     memset(&surface->lockedRect, 0, sizeof(RECT));
2039 }
2040
2041 static const struct wined3d_surface_ops gdi_surface_ops =
2042 {
2043     gdi_surface_private_setup,
2044     gdi_surface_realize_palette,
2045     gdi_surface_map,
2046     gdi_surface_unmap,
2047 };
2048
2049 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2050 {
2051     GLuint *name;
2052     DWORD flag;
2053
2054     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2055
2056     if(srgb)
2057     {
2058         name = &surface->texture_name_srgb;
2059         flag = SFLAG_INSRGBTEX;
2060     }
2061     else
2062     {
2063         name = &surface->texture_name;
2064         flag = SFLAG_INTEXTURE;
2065     }
2066
2067     if (!*name && new_name)
2068     {
2069         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2070          * surface has no texture name yet. See if we can get rid of this. */
2071         if (surface->flags & flag)
2072         {
2073             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2074             surface_modify_location(surface, flag, FALSE);
2075         }
2076     }
2077
2078     *name = new_name;
2079     surface_force_reload(surface);
2080 }
2081
2082 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target, GLint level)
2083 {
2084     TRACE("surface %p, target %#x.\n", surface, target);
2085
2086     if (surface->texture_target != target)
2087     {
2088         if (target == GL_TEXTURE_RECTANGLE_ARB)
2089         {
2090             surface->flags &= ~SFLAG_NORMCOORD;
2091         }
2092         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2093         {
2094             surface->flags |= SFLAG_NORMCOORD;
2095         }
2096     }
2097     surface->texture_target = target;
2098     surface->texture_level = level;
2099     surface_force_reload(surface);
2100 }
2101
2102 /* This call just downloads data, the caller is responsible for binding the
2103  * correct texture. */
2104 /* Context activation is done by the caller. */
2105 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2106 {
2107     const struct wined3d_format *format = surface->resource.format;
2108
2109     /* Only support read back of converted P8 surfaces. */
2110     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2111     {
2112         ERR("Trying to read back converted surface %p with format %s.\n", surface, debug_d3dformat(format->id));
2113         return;
2114     }
2115
2116     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2117     {
2118         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2119                 surface, surface->texture_level, format->glFormat, format->glType,
2120                 surface->resource.allocatedMemory);
2121
2122         if (surface->flags & SFLAG_PBO)
2123         {
2124             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2125             checkGLcall("glBindBufferARB");
2126             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2127             checkGLcall("glGetCompressedTexImageARB");
2128             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2129             checkGLcall("glBindBufferARB");
2130         }
2131         else
2132         {
2133             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2134                     surface->texture_level, surface->resource.allocatedMemory));
2135             checkGLcall("glGetCompressedTexImageARB");
2136         }
2137     }
2138     else
2139     {
2140         void *mem;
2141         GLenum gl_format = format->glFormat;
2142         GLenum gl_type = format->glType;
2143         int src_pitch = 0;
2144         int dst_pitch = 0;
2145
2146         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2147         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2148         {
2149             gl_format = GL_ALPHA;
2150             gl_type = GL_UNSIGNED_BYTE;
2151         }
2152
2153         if (surface->flags & SFLAG_NONPOW2)
2154         {
2155             unsigned char alignment = surface->resource.device->surface_alignment;
2156             src_pitch = format->byte_count * surface->pow2Width;
2157             dst_pitch = wined3d_surface_get_pitch(surface);
2158             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2159             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2160         }
2161         else
2162         {
2163             mem = surface->resource.allocatedMemory;
2164         }
2165
2166         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2167                 surface, surface->texture_level, gl_format, gl_type, mem);
2168
2169         if (surface->flags & SFLAG_PBO)
2170         {
2171             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2172             checkGLcall("glBindBufferARB");
2173
2174             gl_info->gl_ops.gl.p_glGetTexImage(surface->texture_target, surface->texture_level,
2175                     gl_format, gl_type, NULL);
2176             checkGLcall("glGetTexImage");
2177
2178             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2179             checkGLcall("glBindBufferARB");
2180         }
2181         else
2182         {
2183             gl_info->gl_ops.gl.p_glGetTexImage(surface->texture_target, surface->texture_level,
2184                     gl_format, gl_type, mem);
2185             checkGLcall("glGetTexImage");
2186         }
2187
2188         if (surface->flags & SFLAG_NONPOW2)
2189         {
2190             const BYTE *src_data;
2191             BYTE *dst_data;
2192             UINT y;
2193             /*
2194              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2195              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2196              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2197              *
2198              * We're doing this...
2199              *
2200              * instead of boxing the texture :
2201              * |<-texture width ->|  -->pow2width|   /\
2202              * |111111111111111111|              |   |
2203              * |222 Texture 222222| boxed empty  | texture height
2204              * |3333 Data 33333333|              |   |
2205              * |444444444444444444|              |   \/
2206              * -----------------------------------   |
2207              * |     boxed  empty | boxed empty  | pow2height
2208              * |                  |              |   \/
2209              * -----------------------------------
2210              *
2211              *
2212              * we're repacking the data to the expected texture width
2213              *
2214              * |<-texture width ->|  -->pow2width|   /\
2215              * |111111111111111111222222222222222|   |
2216              * |222333333333333333333444444444444| texture height
2217              * |444444                           |   |
2218              * |                                 |   \/
2219              * |                                 |   |
2220              * |            empty                | pow2height
2221              * |                                 |   \/
2222              * -----------------------------------
2223              *
2224              * == is the same as
2225              *
2226              * |<-texture width ->|    /\
2227              * |111111111111111111|
2228              * |222222222222222222|texture height
2229              * |333333333333333333|
2230              * |444444444444444444|    \/
2231              * --------------------
2232              *
2233              * this also means that any references to allocatedMemory should work with the data as if were a
2234              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2235              *
2236              * internally the texture is still stored in a boxed format so any references to textureName will
2237              * get a boxed texture with width pow2width and not a texture of width resource.width.
2238              *
2239              * Performance should not be an issue, because applications normally do not lock the surfaces when
2240              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2241              * and doesn't have to be re-read. */
2242             src_data = mem;
2243             dst_data = surface->resource.allocatedMemory;
2244             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2245             for (y = 1; y < surface->resource.height; ++y)
2246             {
2247                 /* skip the first row */
2248                 src_data += src_pitch;
2249                 dst_data += dst_pitch;
2250                 memcpy(dst_data, src_data, dst_pitch);
2251             }
2252
2253             HeapFree(GetProcessHeap(), 0, mem);
2254         }
2255     }
2256
2257     /* Surface has now been downloaded */
2258     surface->flags |= SFLAG_INSYSMEM;
2259 }
2260
2261 /* This call just uploads data, the caller is responsible for binding the
2262  * correct texture. */
2263 /* Context activation is done by the caller. */
2264 static void surface_upload_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2265         const struct wined3d_format *format, const RECT *src_rect, UINT src_pitch, const POINT *dst_point,
2266         BOOL srgb, const struct wined3d_bo_address *data)
2267 {
2268     UINT update_w = src_rect->right - src_rect->left;
2269     UINT update_h = src_rect->bottom - src_rect->top;
2270
2271     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_pitch %u, dst_point %s, srgb %#x, data {%#x:%p}.\n",
2272             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_pitch,
2273             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2274
2275     if (surface->resource.map_count)
2276     {
2277         WARN("Uploading a surface that is currently mapped, setting SFLAG_PIN_SYSMEM.\n");
2278         surface->flags |= SFLAG_PIN_SYSMEM;
2279     }
2280
2281     if (format->flags & WINED3DFMT_FLAG_HEIGHT_SCALE)
2282     {
2283         update_h *= format->height_scale.numerator;
2284         update_h /= format->height_scale.denominator;
2285     }
2286
2287     if (data->buffer_object)
2288     {
2289         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2290         checkGLcall("glBindBufferARB");
2291     }
2292
2293     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2294     {
2295         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2296         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2297         const BYTE *addr = data->addr;
2298         GLenum internal;
2299
2300         addr += (src_rect->top / format->block_height) * src_pitch;
2301         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2302
2303         if (srgb)
2304             internal = format->glGammaInternal;
2305         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2306             internal = format->rtInternal;
2307         else
2308             internal = format->glInternal;
2309
2310         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2311                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2312                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2313
2314         if (row_length == src_pitch)
2315         {
2316             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2317                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2318         }
2319         else
2320         {
2321             UINT row, y;
2322
2323             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2324              * can't use the unpack row length like below. */
2325             for (row = 0, y = dst_point->y; row < row_count; ++row)
2326             {
2327                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2328                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2329                 y += format->block_height;
2330                 addr += src_pitch;
2331             }
2332         }
2333         checkGLcall("glCompressedTexSubImage2DARB");
2334     }
2335     else
2336     {
2337         const BYTE *addr = data->addr;
2338
2339         addr += src_rect->top * src_pitch;
2340         addr += src_rect->left * format->byte_count;
2341
2342         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2343                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2344                 update_w, update_h, format->glFormat, format->glType, addr);
2345
2346         gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_ROW_LENGTH, src_pitch / format->byte_count);
2347         gl_info->gl_ops.gl.p_glTexSubImage2D(surface->texture_target, surface->texture_level,
2348                 dst_point->x, dst_point->y, update_w, update_h, format->glFormat, format->glType, addr);
2349         gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2350         checkGLcall("glTexSubImage2D");
2351     }
2352
2353     if (data->buffer_object)
2354     {
2355         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2356         checkGLcall("glBindBufferARB");
2357     }
2358
2359     if (wined3d_settings.strict_draw_ordering)
2360         gl_info->gl_ops.gl.p_glFlush();
2361
2362     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2363     {
2364         struct wined3d_device *device = surface->resource.device;
2365         unsigned int i;
2366
2367         for (i = 0; i < device->context_count; ++i)
2368         {
2369             context_surface_update(device->contexts[i], surface);
2370         }
2371     }
2372 }
2373
2374 static HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck, BOOL use_texturing,
2375         struct wined3d_format *format, enum wined3d_conversion_type *conversion_type)
2376 {
2377     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2378     const struct wined3d_device *device = surface->resource.device;
2379     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
2380     BOOL blit_supported = FALSE;
2381
2382     /* Copy the default values from the surface. Below we might perform fixups */
2383     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
2384     *format = *surface->resource.format;
2385     *conversion_type = WINED3D_CT_NONE;
2386
2387     /* Ok, now look if we have to do any conversion */
2388     switch (surface->resource.format->id)
2389     {
2390         case WINED3DFMT_P8_UINT:
2391             /* Below the call to blit_supported is disabled for Wine 1.2
2392              * because the function isn't operating correctly yet. At the
2393              * moment 8-bit blits are handled in software and if certain GL
2394              * extensions are around, surface conversion is performed at
2395              * upload time. The blit_supported call recognizes it as a
2396              * destination fixup. This type of upload 'fixup' and 8-bit to
2397              * 8-bit blits need to be handled by the blit_shader.
2398              * TODO: get rid of this #if 0. */
2399 #if 0
2400             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
2401                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
2402                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
2403 #endif
2404             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
2405
2406             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
2407              * texturing. Further also use conversion in case of color keying.
2408              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
2409              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
2410              * conflicts with this.
2411              */
2412             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
2413                     || colorkey_active || !use_texturing)
2414             {
2415                 format->glFormat = GL_RGBA;
2416                 format->glInternal = GL_RGBA;
2417                 format->glType = GL_UNSIGNED_BYTE;
2418                 format->conv_byte_count = 4;
2419                 if (colorkey_active)
2420                     *conversion_type = WINED3D_CT_PALETTED_CK;
2421                 else
2422                     *conversion_type = WINED3D_CT_PALETTED;
2423             }
2424             break;
2425
2426         case WINED3DFMT_B2G3R3_UNORM:
2427             /* **********************
2428                 GL_UNSIGNED_BYTE_3_3_2
2429                 ********************** */
2430             if (colorkey_active) {
2431                 /* This texture format will never be used.. So do not care about color keying
2432                     up until the point in time it will be needed :-) */
2433                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
2434             }
2435             break;
2436
2437         case WINED3DFMT_B5G6R5_UNORM:
2438             if (colorkey_active)
2439             {
2440                 *conversion_type = WINED3D_CT_CK_565;
2441                 format->glFormat = GL_RGBA;
2442                 format->glInternal = GL_RGB5_A1;
2443                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
2444                 format->conv_byte_count = 2;
2445             }
2446             break;
2447
2448         case WINED3DFMT_B5G5R5X1_UNORM:
2449             if (colorkey_active)
2450             {
2451                 *conversion_type = WINED3D_CT_CK_5551;
2452                 format->glFormat = GL_BGRA;
2453                 format->glInternal = GL_RGB5_A1;
2454                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
2455                 format->conv_byte_count = 2;
2456             }
2457             break;
2458
2459         case WINED3DFMT_B8G8R8_UNORM:
2460             if (colorkey_active)
2461             {
2462                 *conversion_type = WINED3D_CT_CK_RGB24;
2463                 format->glFormat = GL_RGBA;
2464                 format->glInternal = GL_RGBA8;
2465                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
2466                 format->conv_byte_count = 4;
2467             }
2468             break;
2469
2470         case WINED3DFMT_B8G8R8X8_UNORM:
2471             if (colorkey_active)
2472             {
2473                 *conversion_type = WINED3D_CT_RGB32_888;
2474                 format->glFormat = GL_RGBA;
2475                 format->glInternal = GL_RGBA8;
2476                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
2477                 format->conv_byte_count = 4;
2478             }
2479             break;
2480
2481         case WINED3DFMT_B8G8R8A8_UNORM:
2482             if (colorkey_active)
2483             {
2484                 *conversion_type = WINED3D_CT_CK_ARGB32;
2485                 format->conv_byte_count = 4;
2486             }
2487             break;
2488
2489         default:
2490             break;
2491     }
2492
2493     if (*conversion_type != WINED3D_CT_NONE)
2494     {
2495         format->rtInternal = format->glInternal;
2496         format->glGammaInternal = format->glInternal;
2497     }
2498
2499     return WINED3D_OK;
2500 }
2501
2502 static BOOL surface_check_block_align(struct wined3d_surface *surface, const RECT *rect)
2503 {
2504     UINT width_mask, height_mask;
2505
2506     if (!rect->left && !rect->top
2507             && rect->right == surface->resource.width
2508             && rect->bottom == surface->resource.height)
2509         return TRUE;
2510
2511     /* This assumes power of two block sizes, but NPOT block sizes would be
2512      * silly anyway. */
2513     width_mask = surface->resource.format->block_width - 1;
2514     height_mask = surface->resource.format->block_height - 1;
2515
2516     if (!(rect->left & width_mask) && !(rect->top & height_mask)
2517             && !(rect->right & width_mask) && !(rect->bottom & height_mask))
2518         return TRUE;
2519
2520     return FALSE;
2521 }
2522
2523 HRESULT surface_upload_from_surface(struct wined3d_surface *dst_surface, const POINT *dst_point,
2524         struct wined3d_surface *src_surface, const RECT *src_rect)
2525 {
2526     const struct wined3d_format *src_format;
2527     const struct wined3d_format *dst_format;
2528     const struct wined3d_gl_info *gl_info;
2529     enum wined3d_conversion_type convert;
2530     struct wined3d_context *context;
2531     struct wined3d_bo_address data;
2532     struct wined3d_format format;
2533     UINT update_w, update_h;
2534     UINT dst_w, dst_h;
2535     RECT r, dst_rect;
2536     UINT src_pitch;
2537     POINT p;
2538
2539     TRACE("dst_surface %p, dst_point %s, src_surface %p, src_rect %s.\n",
2540             dst_surface, wine_dbgstr_point(dst_point),
2541             src_surface, wine_dbgstr_rect(src_rect));
2542
2543     src_format = src_surface->resource.format;
2544     dst_format = dst_surface->resource.format;
2545
2546     if (src_format->id != dst_format->id)
2547     {
2548         WARN("Source and destination surfaces should have the same format.\n");
2549         return WINED3DERR_INVALIDCALL;
2550     }
2551
2552     if (!dst_point)
2553     {
2554         p.x = 0;
2555         p.y = 0;
2556         dst_point = &p;
2557     }
2558     else if (dst_point->x < 0 || dst_point->y < 0)
2559     {
2560         WARN("Invalid destination point.\n");
2561         return WINED3DERR_INVALIDCALL;
2562     }
2563
2564     if (!src_rect)
2565     {
2566         r.left = 0;
2567         r.top = 0;
2568         r.right = src_surface->resource.width;
2569         r.bottom = src_surface->resource.height;
2570         src_rect = &r;
2571     }
2572     else if (src_rect->left < 0 || src_rect->left >= src_rect->right
2573             || src_rect->top < 0 || src_rect->top >= src_rect->bottom)
2574     {
2575         WARN("Invalid source rectangle.\n");
2576         return WINED3DERR_INVALIDCALL;
2577     }
2578
2579     dst_w = dst_surface->resource.width;
2580     dst_h = dst_surface->resource.height;
2581
2582     update_w = src_rect->right - src_rect->left;
2583     update_h = src_rect->bottom - src_rect->top;
2584
2585     if (update_w > dst_w || dst_point->x > dst_w - update_w
2586             || update_h > dst_h || dst_point->y > dst_h - update_h)
2587     {
2588         WARN("Destination out of bounds.\n");
2589         return WINED3DERR_INVALIDCALL;
2590     }
2591
2592     if ((src_format->flags & WINED3DFMT_FLAG_BLOCKS) && !surface_check_block_align(src_surface, src_rect))
2593     {
2594         WARN("Source rectangle not block-aligned.\n");
2595         return WINED3DERR_INVALIDCALL;
2596     }
2597
2598     SetRect(&dst_rect, dst_point->x, dst_point->y, dst_point->x + update_w, dst_point->y + update_h);
2599     if ((dst_format->flags & WINED3DFMT_FLAG_BLOCKS) && !surface_check_block_align(dst_surface, &dst_rect))
2600     {
2601         WARN("Destination rectangle not block-aligned.\n");
2602         return WINED3DERR_INVALIDCALL;
2603     }
2604
2605     /* Use wined3d_surface_blt() instead of uploading directly if we need conversion. */
2606     d3dfmt_get_conv(dst_surface, FALSE, TRUE, &format, &convert);
2607     if (convert != WINED3D_CT_NONE || format.convert)
2608         return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, src_rect, 0, NULL, WINED3D_TEXF_POINT);
2609
2610     context = context_acquire(dst_surface->resource.device, NULL);
2611     gl_info = context->gl_info;
2612
2613     /* Only load the surface for partial updates. For newly allocated texture
2614      * the texture wouldn't be the current location, and we'd upload zeroes
2615      * just to overwrite them again. */
2616     if (update_w == dst_w && update_h == dst_h)
2617         surface_prepare_texture(dst_surface, context, FALSE);
2618     else
2619         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
2620     surface_bind(dst_surface, context, FALSE);
2621
2622     data.buffer_object = src_surface->pbo;
2623     data.addr = src_surface->resource.allocatedMemory;
2624     src_pitch = wined3d_surface_get_pitch(src_surface);
2625
2626     surface_upload_data(dst_surface, gl_info, src_format, src_rect, src_pitch, dst_point, FALSE, &data);
2627
2628     invalidate_active_texture(dst_surface->resource.device, context);
2629
2630     context_release(context);
2631
2632     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
2633     return WINED3D_OK;
2634 }
2635
2636 /* This call just allocates the texture, the caller is responsible for binding
2637  * the correct texture. */
2638 /* Context activation is done by the caller. */
2639 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2640         const struct wined3d_format *format, BOOL srgb)
2641 {
2642     BOOL enable_client_storage = FALSE;
2643     GLsizei width = surface->pow2Width;
2644     GLsizei height = surface->pow2Height;
2645     const BYTE *mem = NULL;
2646     GLenum internal;
2647
2648     if (srgb)
2649     {
2650         internal = format->glGammaInternal;
2651     }
2652     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2653     {
2654         internal = format->rtInternal;
2655     }
2656     else
2657     {
2658         internal = format->glInternal;
2659     }
2660
2661     if (!internal)
2662         FIXME("No GL internal format for format %s.\n", debug_d3dformat(format->id));
2663
2664     if (format->flags & WINED3DFMT_FLAG_HEIGHT_SCALE)
2665     {
2666         height *= format->height_scale.numerator;
2667         height /= format->height_scale.denominator;
2668     }
2669
2670     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2671             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2672             internal, width, height, format->glFormat, format->glType);
2673
2674     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2675     {
2676         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2677                 || !surface->resource.allocatedMemory)
2678         {
2679             /* In some cases we want to disable client storage.
2680              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2681              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2682              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2683              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2684              */
2685             gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2686             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2687             surface->flags &= ~SFLAG_CLIENT;
2688             enable_client_storage = TRUE;
2689         }
2690         else
2691         {
2692             surface->flags |= SFLAG_CLIENT;
2693
2694             /* Point OpenGL to our allocated texture memory. Do not use
2695              * resource.allocatedMemory here because it might point into a
2696              * PBO. Instead use heapMemory, but get the alignment right. */
2697             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2698                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2699         }
2700     }
2701
2702     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2703     {
2704         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2705                 internal, width, height, 0, surface->resource.size, mem));
2706         checkGLcall("glCompressedTexImage2DARB");
2707     }
2708     else
2709     {
2710         gl_info->gl_ops.gl.p_glTexImage2D(surface->texture_target, surface->texture_level,
2711                 internal, width, height, 0, format->glFormat, format->glType, mem);
2712         checkGLcall("glTexImage2D");
2713     }
2714
2715     if (enable_client_storage)
2716     {
2717         gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2718         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2719     }
2720 }
2721
2722 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2723  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2724 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2725 /* Context activation is done by the caller. */
2726 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2727 {
2728     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2729     struct wined3d_renderbuffer_entry *entry;
2730     GLuint renderbuffer = 0;
2731     unsigned int src_width, src_height;
2732     unsigned int width, height;
2733
2734     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2735     {
2736         width = rt->pow2Width;
2737         height = rt->pow2Height;
2738     }
2739     else
2740     {
2741         width = surface->pow2Width;
2742         height = surface->pow2Height;
2743     }
2744
2745     src_width = surface->pow2Width;
2746     src_height = surface->pow2Height;
2747
2748     /* A depth stencil smaller than the render target is not valid */
2749     if (width > src_width || height > src_height) return;
2750
2751     /* Remove any renderbuffer set if the sizes match */
2752     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2753             || (width == src_width && height == src_height))
2754     {
2755         surface->current_renderbuffer = NULL;
2756         return;
2757     }
2758
2759     /* Look if we've already got a renderbuffer of the correct dimensions */
2760     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2761     {
2762         if (entry->width == width && entry->height == height)
2763         {
2764             renderbuffer = entry->id;
2765             surface->current_renderbuffer = entry;
2766             break;
2767         }
2768     }
2769
2770     if (!renderbuffer)
2771     {
2772         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2773         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2774         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2775                 surface->resource.format->glInternal, width, height);
2776
2777         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2778         entry->width = width;
2779         entry->height = height;
2780         entry->id = renderbuffer;
2781         list_add_head(&surface->renderbuffers, &entry->entry);
2782
2783         surface->current_renderbuffer = entry;
2784     }
2785
2786     checkGLcall("set_compatible_renderbuffer");
2787 }
2788
2789 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2790 {
2791     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2792
2793     TRACE("surface %p.\n", surface);
2794
2795     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2796     {
2797         ERR("Surface %p is not on a swapchain.\n", surface);
2798         return GL_NONE;
2799     }
2800
2801     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2802     {
2803         if (swapchain->render_to_fbo)
2804         {
2805             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2806             return GL_COLOR_ATTACHMENT0;
2807         }
2808         TRACE("Returning GL_BACK\n");
2809         return GL_BACK;
2810     }
2811     else if (surface == swapchain->front_buffer)
2812     {
2813         TRACE("Returning GL_FRONT\n");
2814         return GL_FRONT;
2815     }
2816
2817     FIXME("Higher back buffer, returning GL_BACK\n");
2818     return GL_BACK;
2819 }
2820
2821 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2822 void surface_add_dirty_rect(struct wined3d_surface *surface, const struct wined3d_box *dirty_rect)
2823 {
2824     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2825
2826     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2827         /* No partial locking for textures yet. */
2828         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2829
2830     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2831     if (dirty_rect)
2832     {
2833         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->left);
2834         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->top);
2835         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->right);
2836         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->bottom);
2837     }
2838     else
2839     {
2840         surface->dirtyRect.left = 0;
2841         surface->dirtyRect.top = 0;
2842         surface->dirtyRect.right = surface->resource.width;
2843         surface->dirtyRect.bottom = surface->resource.height;
2844     }
2845
2846     /* if the container is a texture then mark it dirty. */
2847     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2848     {
2849         TRACE("Passing to container.\n");
2850         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2851     }
2852 }
2853
2854 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2855 {
2856     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2857     BOOL ck_changed;
2858
2859     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2860
2861     if (surface->resource.pool == WINED3D_POOL_SCRATCH)
2862     {
2863         ERR("Not supported on scratch surfaces.\n");
2864         return WINED3DERR_INVALIDCALL;
2865     }
2866
2867     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2868
2869     /* Reload if either the texture and sysmem have different ideas about the
2870      * color key, or the actual key values changed. */
2871     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2872             && (surface->gl_color_key.color_space_low_value != surface->src_blt_color_key.color_space_low_value
2873             || surface->gl_color_key.color_space_high_value != surface->src_blt_color_key.color_space_high_value)))
2874     {
2875         TRACE("Reloading because of color keying\n");
2876         /* To perform the color key conversion we need a sysmem copy of
2877          * the surface. Make sure we have it. */
2878
2879         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2880         /* Make sure the texture is reloaded because of the color key change,
2881          * this kills performance though :( */
2882         /* TODO: This is not necessarily needed with hw palettized texture support. */
2883         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2884         /* Switching color keying on / off may change the internal format. */
2885         if (ck_changed)
2886             surface_force_reload(surface);
2887     }
2888     else if (!(surface->flags & flag))
2889     {
2890         TRACE("Reloading because surface is dirty.\n");
2891     }
2892     else
2893     {
2894         TRACE("surface is already in texture\n");
2895         return WINED3D_OK;
2896     }
2897
2898     /* No partial locking for textures yet. */
2899     surface_load_location(surface, flag, NULL);
2900     surface_evict_sysmem(surface);
2901
2902     return WINED3D_OK;
2903 }
2904
2905 /* See also float_16_to_32() in wined3d_private.h */
2906 static inline unsigned short float_32_to_16(const float *in)
2907 {
2908     int exp = 0;
2909     float tmp = fabsf(*in);
2910     unsigned int mantissa;
2911     unsigned short ret;
2912
2913     /* Deal with special numbers */
2914     if (*in == 0.0f)
2915         return 0x0000;
2916     if (isnan(*in))
2917         return 0x7c01;
2918     if (isinf(*in))
2919         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2920
2921     if (tmp < powf(2, 10))
2922     {
2923         do
2924         {
2925             tmp = tmp * 2.0f;
2926             exp--;
2927         } while (tmp < powf(2, 10));
2928     }
2929     else if (tmp >= powf(2, 11))
2930     {
2931         do
2932         {
2933             tmp /= 2.0f;
2934             exp++;
2935         } while (tmp >= powf(2, 11));
2936     }
2937
2938     mantissa = (unsigned int)tmp;
2939     if (tmp - mantissa >= 0.5f)
2940         ++mantissa; /* Round to nearest, away from zero. */
2941
2942     exp += 10;  /* Normalize the mantissa. */
2943     exp += 15;  /* Exponent is encoded with excess 15. */
2944
2945     if (exp > 30) /* too big */
2946     {
2947         ret = 0x7c00; /* INF */
2948     }
2949     else if (exp <= 0)
2950     {
2951         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2952         while (exp <= 0)
2953         {
2954             mantissa = mantissa >> 1;
2955             ++exp;
2956         }
2957         ret = mantissa & 0x3ff;
2958     }
2959     else
2960     {
2961         ret = (exp << 10) | (mantissa & 0x3ff);
2962     }
2963
2964     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2965     return ret;
2966 }
2967
2968 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2969 {
2970     ULONG refcount;
2971
2972     TRACE("Surface %p, container %p of type %#x.\n",
2973             surface, surface->container.u.base, surface->container.type);
2974
2975     switch (surface->container.type)
2976     {
2977         case WINED3D_CONTAINER_TEXTURE:
2978             return wined3d_texture_incref(surface->container.u.texture);
2979
2980         case WINED3D_CONTAINER_SWAPCHAIN:
2981             return wined3d_swapchain_incref(surface->container.u.swapchain);
2982
2983         default:
2984             ERR("Unhandled container type %#x.\n", surface->container.type);
2985         case WINED3D_CONTAINER_NONE:
2986             break;
2987     }
2988
2989     refcount = InterlockedIncrement(&surface->resource.ref);
2990     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2991
2992     return refcount;
2993 }
2994
2995 /* Do not call while under the GL lock. */
2996 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2997 {
2998     ULONG refcount;
2999
3000     TRACE("Surface %p, container %p of type %#x.\n",
3001             surface, surface->container.u.base, surface->container.type);
3002
3003     switch (surface->container.type)
3004     {
3005         case WINED3D_CONTAINER_TEXTURE:
3006             return wined3d_texture_decref(surface->container.u.texture);
3007
3008         case WINED3D_CONTAINER_SWAPCHAIN:
3009             return wined3d_swapchain_decref(surface->container.u.swapchain);
3010
3011         default:
3012             ERR("Unhandled container type %#x.\n", surface->container.type);
3013         case WINED3D_CONTAINER_NONE:
3014             break;
3015     }
3016
3017     refcount = InterlockedDecrement(&surface->resource.ref);
3018     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
3019
3020     if (!refcount)
3021     {
3022         surface_cleanup(surface);
3023         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
3024
3025         TRACE("Destroyed surface %p.\n", surface);
3026         HeapFree(GetProcessHeap(), 0, surface);
3027     }
3028
3029     return refcount;
3030 }
3031
3032 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
3033 {
3034     return resource_set_priority(&surface->resource, priority);
3035 }
3036
3037 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
3038 {
3039     return resource_get_priority(&surface->resource);
3040 }
3041
3042 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
3043 {
3044     TRACE("surface %p.\n", surface);
3045
3046     if (!surface->resource.device->d3d_initialized)
3047     {
3048         ERR("D3D not initialized.\n");
3049         return;
3050     }
3051
3052     surface_internal_preload(surface, SRGB_ANY);
3053 }
3054
3055 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
3056 {
3057     TRACE("surface %p.\n", surface);
3058
3059     return surface->resource.parent;
3060 }
3061
3062 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
3063 {
3064     TRACE("surface %p.\n", surface);
3065
3066     return &surface->resource;
3067 }
3068
3069 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
3070 {
3071     TRACE("surface %p, flags %#x.\n", surface, flags);
3072
3073     switch (flags)
3074     {
3075         case WINEDDGBS_CANBLT:
3076         case WINEDDGBS_ISBLTDONE:
3077             return WINED3D_OK;
3078
3079         default:
3080             return WINED3DERR_INVALIDCALL;
3081     }
3082 }
3083
3084 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
3085 {
3086     TRACE("surface %p, flags %#x.\n", surface, flags);
3087
3088     /* XXX: DDERR_INVALIDSURFACETYPE */
3089
3090     switch (flags)
3091     {
3092         case WINEDDGFS_CANFLIP:
3093         case WINEDDGFS_ISFLIPDONE:
3094             return WINED3D_OK;
3095
3096         default:
3097             return WINED3DERR_INVALIDCALL;
3098     }
3099 }
3100
3101 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
3102 {
3103     TRACE("surface %p.\n", surface);
3104
3105     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
3106     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
3107 }
3108
3109 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
3110 {
3111     TRACE("surface %p.\n", surface);
3112
3113     surface->flags &= ~SFLAG_LOST;
3114     return WINED3D_OK;
3115 }
3116
3117 void CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
3118 {
3119     TRACE("surface %p, palette %p.\n", surface, palette);
3120
3121     if (surface->palette == palette)
3122     {
3123         TRACE("Nop palette change.\n");
3124         return;
3125     }
3126
3127     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
3128         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
3129
3130     surface->palette = palette;
3131
3132     if (palette)
3133     {
3134         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3135             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
3136
3137         surface->surface_ops->surface_realize_palette(surface);
3138     }
3139 }
3140
3141 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
3142         DWORD flags, const struct wined3d_color_key *color_key)
3143 {
3144     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
3145
3146     if (flags & WINEDDCKEY_COLORSPACE)
3147     {
3148         FIXME(" colorkey value not supported (%08x) !\n", flags);
3149         return WINED3DERR_INVALIDCALL;
3150     }
3151
3152     /* Dirtify the surface, but only if a key was changed. */
3153     if (color_key)
3154     {
3155         switch (flags & ~WINEDDCKEY_COLORSPACE)
3156         {
3157             case WINEDDCKEY_DESTBLT:
3158                 surface->dst_blt_color_key = *color_key;
3159                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3160                 break;
3161
3162             case WINEDDCKEY_DESTOVERLAY:
3163                 surface->dst_overlay_color_key = *color_key;
3164                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3165                 break;
3166
3167             case WINEDDCKEY_SRCOVERLAY:
3168                 surface->src_overlay_color_key = *color_key;
3169                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3170                 break;
3171
3172             case WINEDDCKEY_SRCBLT:
3173                 surface->src_blt_color_key = *color_key;
3174                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3175                 break;
3176         }
3177     }
3178     else
3179     {
3180         switch (flags & ~WINEDDCKEY_COLORSPACE)
3181         {
3182             case WINEDDCKEY_DESTBLT:
3183                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3184                 break;
3185
3186             case WINEDDCKEY_DESTOVERLAY:
3187                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3188                 break;
3189
3190             case WINEDDCKEY_SRCOVERLAY:
3191                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3192                 break;
3193
3194             case WINEDDCKEY_SRCBLT:
3195                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3196                 break;
3197         }
3198     }
3199
3200     return WINED3D_OK;
3201 }
3202
3203 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3204 {
3205     TRACE("surface %p.\n", surface);
3206
3207     return surface->palette;
3208 }
3209
3210 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3211 {
3212     const struct wined3d_format *format = surface->resource.format;
3213     DWORD pitch;
3214
3215     TRACE("surface %p.\n", surface);
3216
3217     if (format->flags & WINED3DFMT_FLAG_BLOCKS)
3218     {
3219         /* Since compressed formats are block based, pitch means the amount of
3220          * bytes to the next row of block rather than the next row of pixels. */
3221         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3222         pitch = row_block_count * format->block_byte_count;
3223     }
3224     else
3225     {
3226         unsigned char alignment = surface->resource.device->surface_alignment;
3227         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3228         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3229     }
3230
3231     TRACE("Returning %u.\n", pitch);
3232
3233     return pitch;
3234 }
3235
3236 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3237 {
3238     TRACE("surface %p, mem %p.\n", surface, mem);
3239
3240     if (surface->resource.map_count || (surface->flags & SFLAG_DCINUSE))
3241     {
3242         WARN("Surface is mapped or the DC is in use.\n");
3243         return WINED3DERR_INVALIDCALL;
3244     }
3245
3246     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3247     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3248     {
3249         ERR("Not supported on render targets.\n");
3250         return WINED3DERR_INVALIDCALL;
3251     }
3252
3253     if (mem && mem != surface->resource.allocatedMemory)
3254     {
3255         void *release = NULL;
3256
3257         /* Do I have to copy the old surface content? */
3258         if (surface->flags & SFLAG_DIBSECTION)
3259         {
3260             DeleteDC(surface->hDC);
3261             DeleteObject(surface->dib.DIBsection);
3262             surface->dib.bitmap_data = NULL;
3263             surface->resource.allocatedMemory = NULL;
3264             surface->hDC = NULL;
3265             surface->flags &= ~SFLAG_DIBSECTION;
3266         }
3267         else if (!(surface->flags & SFLAG_USERPTR))
3268         {
3269             release = surface->resource.heapMemory;
3270             surface->resource.heapMemory = NULL;
3271         }
3272         surface->resource.allocatedMemory = mem;
3273         surface->flags |= SFLAG_USERPTR;
3274
3275         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3276         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3277
3278         /* For client textures OpenGL has to be notified. */
3279         if (surface->flags & SFLAG_CLIENT)
3280             surface_release_client_storage(surface);
3281
3282         /* Now free the old memory if any. */
3283         HeapFree(GetProcessHeap(), 0, release);
3284     }
3285     else if (surface->flags & SFLAG_USERPTR)
3286     {
3287         /* HeapMemory should be NULL already. */
3288         if (surface->resource.heapMemory)
3289             ERR("User pointer surface has heap memory allocated.\n");
3290
3291         if (!mem)
3292         {
3293             surface->resource.allocatedMemory = NULL;
3294             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3295
3296             if (surface->flags & SFLAG_CLIENT)
3297                 surface_release_client_storage(surface);
3298
3299             surface_prepare_system_memory(surface);
3300         }
3301
3302         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3303     }
3304
3305     return WINED3D_OK;
3306 }
3307
3308 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3309 {
3310     LONG w, h;
3311
3312     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3313
3314     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3315     {
3316         WARN("Not an overlay surface.\n");
3317         return WINEDDERR_NOTAOVERLAYSURFACE;
3318     }
3319
3320     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3321     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3322     surface->overlay_destrect.left = x;
3323     surface->overlay_destrect.top = y;
3324     surface->overlay_destrect.right = x + w;
3325     surface->overlay_destrect.bottom = y + h;
3326
3327     surface_draw_overlay(surface);
3328
3329     return WINED3D_OK;
3330 }
3331
3332 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3333 {
3334     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3335
3336     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3337     {
3338         TRACE("Not an overlay surface.\n");
3339         return WINEDDERR_NOTAOVERLAYSURFACE;
3340     }
3341
3342     if (!surface->overlay_dest)
3343     {
3344         TRACE("Overlay not visible.\n");
3345         *x = 0;
3346         *y = 0;
3347         return WINEDDERR_OVERLAYNOTVISIBLE;
3348     }
3349
3350     *x = surface->overlay_destrect.left;
3351     *y = surface->overlay_destrect.top;
3352
3353     TRACE("Returning position %d, %d.\n", *x, *y);
3354
3355     return WINED3D_OK;
3356 }
3357
3358 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3359         DWORD flags, struct wined3d_surface *ref)
3360 {
3361     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3362
3363     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3364     {
3365         TRACE("Not an overlay surface.\n");
3366         return WINEDDERR_NOTAOVERLAYSURFACE;
3367     }
3368
3369     return WINED3D_OK;
3370 }
3371
3372 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3373         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3374 {
3375     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3376             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3377
3378     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3379     {
3380         WARN("Not an overlay surface.\n");
3381         return WINEDDERR_NOTAOVERLAYSURFACE;
3382     }
3383     else if (!dst_surface)
3384     {
3385         WARN("Dest surface is NULL.\n");
3386         return WINED3DERR_INVALIDCALL;
3387     }
3388
3389     if (src_rect)
3390     {
3391         surface->overlay_srcrect = *src_rect;
3392     }
3393     else
3394     {
3395         surface->overlay_srcrect.left = 0;
3396         surface->overlay_srcrect.top = 0;
3397         surface->overlay_srcrect.right = surface->resource.width;
3398         surface->overlay_srcrect.bottom = surface->resource.height;
3399     }
3400
3401     if (dst_rect)
3402     {
3403         surface->overlay_destrect = *dst_rect;
3404     }
3405     else
3406     {
3407         surface->overlay_destrect.left = 0;
3408         surface->overlay_destrect.top = 0;
3409         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3410         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3411     }
3412
3413     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3414     {
3415         surface->overlay_dest = NULL;
3416         list_remove(&surface->overlay_entry);
3417     }
3418
3419     if (flags & WINEDDOVER_SHOW)
3420     {
3421         if (surface->overlay_dest != dst_surface)
3422         {
3423             surface->overlay_dest = dst_surface;
3424             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3425         }
3426     }
3427     else if (flags & WINEDDOVER_HIDE)
3428     {
3429         /* tests show that the rectangles are erased on hide */
3430         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3431         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3432         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3433         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3434         surface->overlay_dest = NULL;
3435     }
3436
3437     surface_draw_overlay(surface);
3438
3439     return WINED3D_OK;
3440 }
3441
3442 HRESULT CDECL wined3d_surface_update_desc(struct wined3d_surface *surface,
3443         UINT width, UINT height, enum wined3d_format_id format_id,
3444         enum wined3d_multisample_type multisample_type, UINT multisample_quality)
3445 {
3446     struct wined3d_device *device = surface->resource.device;
3447     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
3448     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
3449     UINT resource_size = wined3d_format_calculate_size(format, device->surface_alignment, width, height);
3450
3451     TRACE("surface %p, width %u, height %u, format %s, multisample_type %#x, multisample_quality %u.\n",
3452             surface, width, height, debug_d3dformat(format_id), multisample_type, multisample_type);
3453
3454     if (!resource_size)
3455         return WINED3DERR_INVALIDCALL;
3456
3457     if (device->d3d_initialized)
3458         surface->resource.resource_ops->resource_unload(&surface->resource);
3459
3460     if (surface->flags & SFLAG_DIBSECTION)
3461     {
3462         DeleteDC(surface->hDC);
3463         DeleteObject(surface->dib.DIBsection);
3464         surface->dib.bitmap_data = NULL;
3465         surface->flags &= ~SFLAG_DIBSECTION;
3466     }
3467
3468     surface->flags &= ~(SFLAG_LOCATIONS | SFLAG_USERPTR);
3469     surface->resource.allocatedMemory = NULL;
3470     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
3471     surface->resource.heapMemory = NULL;
3472
3473     surface->resource.width = width;
3474     surface->resource.height = height;
3475     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[ARB_TEXTURE_RECTANGLE]
3476             || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
3477     {
3478         surface->pow2Width = width;
3479         surface->pow2Height = height;
3480     }
3481     else
3482     {
3483         surface->pow2Width = surface->pow2Height = 1;
3484         while (surface->pow2Width < width)
3485             surface->pow2Width <<= 1;
3486         while (surface->pow2Height < height)
3487             surface->pow2Height <<= 1;
3488     }
3489
3490     if (surface->pow2Width != width || surface->pow2Height != height)
3491         surface->flags |= SFLAG_NONPOW2;
3492     else
3493         surface->flags &= ~SFLAG_NONPOW2;
3494
3495     surface->resource.format = format;
3496     surface->resource.multisample_type = multisample_type;
3497     surface->resource.multisample_quality = multisample_quality;
3498     surface->resource.size = resource_size;
3499
3500     if (!surface_init_sysmem(surface))
3501         return E_OUTOFMEMORY;
3502
3503     return WINED3D_OK;
3504 }
3505
3506 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3507         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3508 {
3509     unsigned short *dst_s;
3510     const float *src_f;
3511     unsigned int x, y;
3512
3513     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3514
3515     for (y = 0; y < h; ++y)
3516     {
3517         src_f = (const float *)(src + y * pitch_in);
3518         dst_s = (unsigned short *) (dst + y * pitch_out);
3519         for (x = 0; x < w; ++x)
3520         {
3521             dst_s[x] = float_32_to_16(src_f + x);
3522         }
3523     }
3524 }
3525
3526 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3527         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3528 {
3529     static const unsigned char convert_5to8[] =
3530     {
3531         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3532         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3533         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3534         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3535     };
3536     static const unsigned char convert_6to8[] =
3537     {
3538         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3539         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3540         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3541         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3542         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3543         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3544         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3545         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3546     };
3547     unsigned int x, y;
3548
3549     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3550
3551     for (y = 0; y < h; ++y)
3552     {
3553         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3554         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3555         for (x = 0; x < w; ++x)
3556         {
3557             WORD pixel = src_line[x];
3558             dst_line[x] = 0xff000000
3559                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3560                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3561                     | convert_5to8[(pixel & 0x001f)];
3562         }
3563     }
3564 }
3565
3566 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3567  * in both cases we're just setting the X / Alpha channel to 0xff. */
3568 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3569         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3570 {
3571     unsigned int x, y;
3572
3573     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3574
3575     for (y = 0; y < h; ++y)
3576     {
3577         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3578         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3579
3580         for (x = 0; x < w; ++x)
3581         {
3582             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3583         }
3584     }
3585 }
3586
3587 static inline BYTE cliptobyte(int x)
3588 {
3589     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3590 }
3591
3592 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3593         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3594 {
3595     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3596     unsigned int x, y;
3597
3598     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3599
3600     for (y = 0; y < h; ++y)
3601     {
3602         const BYTE *src_line = src + y * pitch_in;
3603         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3604         for (x = 0; x < w; ++x)
3605         {
3606             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3607              *     C = Y - 16; D = U - 128; E = V - 128;
3608              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3609              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3610              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3611              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3612              * U and V are shared between the pixels. */
3613             if (!(x & 1)) /* For every even pixel, read new U and V. */
3614             {
3615                 d = (int) src_line[1] - 128;
3616                 e = (int) src_line[3] - 128;
3617                 r2 = 409 * e + 128;
3618                 g2 = - 100 * d - 208 * e + 128;
3619                 b2 = 516 * d + 128;
3620             }
3621             c2 = 298 * ((int) src_line[0] - 16);
3622             dst_line[x] = 0xff000000
3623                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3624                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3625                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3626                 /* Scale RGB values to 0..255 range,
3627                  * then clip them if still not in range (may be negative),
3628                  * then shift them within DWORD if necessary. */
3629             src_line += 2;
3630         }
3631     }
3632 }
3633
3634 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3635         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3636 {
3637     unsigned int x, y;
3638     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3639
3640     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3641
3642     for (y = 0; y < h; ++y)
3643     {
3644         const BYTE *src_line = src + y * pitch_in;
3645         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3646         for (x = 0; x < w; ++x)
3647         {
3648             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3649              *     C = Y - 16; D = U - 128; E = V - 128;
3650              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3651              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3652              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3653              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3654              * U and V are shared between the pixels. */
3655             if (!(x & 1)) /* For every even pixel, read new U and V. */
3656             {
3657                 d = (int) src_line[1] - 128;
3658                 e = (int) src_line[3] - 128;
3659                 r2 = 409 * e + 128;
3660                 g2 = - 100 * d - 208 * e + 128;
3661                 b2 = 516 * d + 128;
3662             }
3663             c2 = 298 * ((int) src_line[0] - 16);
3664             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3665                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3666                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3667                 /* Scale RGB values to 0..255 range,
3668                  * then clip them if still not in range (may be negative),
3669                  * then shift them within DWORD if necessary. */
3670             src_line += 2;
3671         }
3672     }
3673 }
3674
3675 struct d3dfmt_converter_desc
3676 {
3677     enum wined3d_format_id from, to;
3678     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3679 };
3680
3681 static const struct d3dfmt_converter_desc converters[] =
3682 {
3683     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3684     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3685     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3686     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3687     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3688     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3689 };
3690
3691 static inline const struct d3dfmt_converter_desc *find_converter(enum wined3d_format_id from,
3692         enum wined3d_format_id to)
3693 {
3694     unsigned int i;
3695
3696     for (i = 0; i < (sizeof(converters) / sizeof(*converters)); ++i)
3697     {
3698         if (converters[i].from == from && converters[i].to == to)
3699             return &converters[i];
3700     }
3701
3702     return NULL;
3703 }
3704
3705 /*****************************************************************************
3706  * surface_convert_format
3707  *
3708  * Creates a duplicate of a surface in a different format. Is used by Blt to
3709  * blit between surfaces with different formats.
3710  *
3711  * Parameters
3712  *  source: Source surface
3713  *  fmt: Requested destination format
3714  *
3715  *****************************************************************************/
3716 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3717 {
3718     struct wined3d_map_desc src_map, dst_map;
3719     const struct d3dfmt_converter_desc *conv;
3720     struct wined3d_surface *ret = NULL;
3721     HRESULT hr;
3722
3723     conv = find_converter(source->resource.format->id, to_fmt);
3724     if (!conv)
3725     {
3726         FIXME("Cannot find a conversion function from format %s to %s.\n",
3727                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3728         return NULL;
3729     }
3730
3731     /* FIXME: Multisampled conversion? */
3732     if (FAILED(hr = wined3d_surface_create(source->resource.device, source->resource.width, source->resource.height,
3733             to_fmt, 0, WINED3D_POOL_SCRATCH, WINED3D_MULTISAMPLE_NONE, 0,
3734             WINED3D_SURFACE_MAPPABLE | WINED3D_SURFACE_DISCARD, NULL, &wined3d_null_parent_ops, &ret)))
3735     {
3736         ERR("Failed to create a destination surface for conversion.\n");
3737         return NULL;
3738     }
3739
3740     memset(&src_map, 0, sizeof(src_map));
3741     memset(&dst_map, 0, sizeof(dst_map));
3742
3743     if (FAILED(hr = wined3d_surface_map(source, &src_map, NULL, WINED3D_MAP_READONLY)))
3744     {
3745         ERR("Failed to lock the source surface.\n");
3746         wined3d_surface_decref(ret);
3747         return NULL;
3748     }
3749     if (FAILED(hr = wined3d_surface_map(ret, &dst_map, NULL, WINED3D_MAP_READONLY)))
3750     {
3751         ERR("Failed to lock the destination surface.\n");
3752         wined3d_surface_unmap(source);
3753         wined3d_surface_decref(ret);
3754         return NULL;
3755     }
3756
3757     conv->convert(src_map.data, dst_map.data, src_map.row_pitch, dst_map.row_pitch,
3758             source->resource.width, source->resource.height);
3759
3760     wined3d_surface_unmap(ret);
3761     wined3d_surface_unmap(source);
3762
3763     return ret;
3764 }
3765
3766 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3767         unsigned int bpp, UINT pitch, DWORD color)
3768 {
3769     BYTE *first;
3770     unsigned int x, y;
3771
3772     /* Do first row */
3773
3774 #define COLORFILL_ROW(type) \
3775 do { \
3776     type *d = (type *)buf; \
3777     for (x = 0; x < width; ++x) \
3778         d[x] = (type)color; \
3779 } while(0)
3780
3781     switch (bpp)
3782     {
3783         case 1:
3784             COLORFILL_ROW(BYTE);
3785             break;
3786
3787         case 2:
3788             COLORFILL_ROW(WORD);
3789             break;
3790
3791         case 3:
3792         {
3793             BYTE *d = buf;
3794             for (x = 0; x < width; ++x, d += 3)
3795             {
3796                 d[0] = (color      ) & 0xff;
3797                 d[1] = (color >>  8) & 0xff;
3798                 d[2] = (color >> 16) & 0xff;
3799             }
3800             break;
3801         }
3802         case 4:
3803             COLORFILL_ROW(DWORD);
3804             break;
3805
3806         default:
3807             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3808             return WINED3DERR_NOTAVAILABLE;
3809     }
3810
3811 #undef COLORFILL_ROW
3812
3813     /* Now copy first row. */
3814     first = buf;
3815     for (y = 1; y < height; ++y)
3816     {
3817         buf += pitch;
3818         memcpy(buf, first, width * bpp);
3819     }
3820
3821     return WINED3D_OK;
3822 }
3823
3824 struct wined3d_surface * CDECL wined3d_surface_from_resource(struct wined3d_resource *resource)
3825 {
3826     return surface_from_resource(resource);
3827 }
3828
3829 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3830 {
3831     TRACE("surface %p.\n", surface);
3832
3833     if (!surface->resource.map_count)
3834     {
3835         WARN("Trying to unmap unmapped surface.\n");
3836         return WINEDDERR_NOTLOCKED;
3837     }
3838     --surface->resource.map_count;
3839
3840     surface->surface_ops->surface_unmap(surface);
3841
3842     return WINED3D_OK;
3843 }
3844
3845 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3846         struct wined3d_map_desc *map_desc, const RECT *rect, DWORD flags)
3847 {
3848     const struct wined3d_format *format = surface->resource.format;
3849
3850     TRACE("surface %p, map_desc %p, rect %s, flags %#x.\n",
3851             surface, map_desc, wine_dbgstr_rect(rect), flags);
3852
3853     if (surface->resource.map_count)
3854     {
3855         WARN("Surface is already mapped.\n");
3856         return WINED3DERR_INVALIDCALL;
3857     }
3858
3859     if ((format->flags & WINED3DFMT_FLAG_BLOCKS) && rect
3860             && !surface_check_block_align(surface, rect))
3861     {
3862         WARN("Map rect %s is misaligned for %ux%u blocks.\n",
3863                 wine_dbgstr_rect(rect), format->block_width, format->block_height);
3864
3865         if (surface->resource.pool == WINED3D_POOL_DEFAULT)
3866             return WINED3DERR_INVALIDCALL;
3867     }
3868
3869     ++surface->resource.map_count;
3870
3871     if (!(surface->flags & SFLAG_LOCKABLE))
3872         WARN("Trying to lock unlockable surface.\n");
3873
3874     /* Performance optimization: Count how often a surface is mapped, if it is
3875      * mapped regularly do not throw away the system memory copy. This avoids
3876      * the need to download the surface from OpenGL all the time. The surface
3877      * is still downloaded if the OpenGL texture is changed. */
3878     if (!(surface->flags & SFLAG_DYNLOCK))
3879     {
3880         if (++surface->lockCount > MAXLOCKCOUNT)
3881         {
3882             TRACE("Surface is mapped regularly, not freeing the system memory copy any more.\n");
3883             surface->flags |= SFLAG_DYNLOCK;
3884         }
3885     }
3886
3887     surface->surface_ops->surface_map(surface, rect, flags);
3888
3889     if (format->flags & WINED3DFMT_FLAG_BROKEN_PITCH)
3890         map_desc->row_pitch = surface->resource.width * format->byte_count;
3891     else
3892         map_desc->row_pitch = wined3d_surface_get_pitch(surface);
3893     map_desc->slice_pitch = 0;
3894
3895     if (!rect)
3896     {
3897         map_desc->data = surface->resource.allocatedMemory;
3898         surface->lockedRect.left = 0;
3899         surface->lockedRect.top = 0;
3900         surface->lockedRect.right = surface->resource.width;
3901         surface->lockedRect.bottom = surface->resource.height;
3902     }
3903     else
3904     {
3905         if ((format->flags & (WINED3DFMT_FLAG_BLOCKS | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_BLOCKS)
3906         {
3907             /* Compressed textures are block based, so calculate the offset of
3908              * the block that contains the top-left pixel of the locked rectangle. */
3909             map_desc->data = surface->resource.allocatedMemory
3910                     + ((rect->top / format->block_height) * map_desc->row_pitch)
3911                     + ((rect->left / format->block_width) * format->block_byte_count);
3912         }
3913         else
3914         {
3915             map_desc->data = surface->resource.allocatedMemory
3916                     + (map_desc->row_pitch * rect->top)
3917                     + (rect->left * format->byte_count);
3918         }
3919         surface->lockedRect.left = rect->left;
3920         surface->lockedRect.top = rect->top;
3921         surface->lockedRect.right = rect->right;
3922         surface->lockedRect.bottom = rect->bottom;
3923     }
3924
3925     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3926     TRACE("Returning memory %p, pitch %u.\n", map_desc->data, map_desc->row_pitch);
3927
3928     return WINED3D_OK;
3929 }
3930
3931 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3932 {
3933     struct wined3d_map_desc map;
3934     HRESULT hr;
3935
3936     TRACE("surface %p, dc %p.\n", surface, dc);
3937
3938     if (surface->flags & SFLAG_USERPTR)
3939     {
3940         ERR("Not supported on surfaces with application-provided memory.\n");
3941         return WINEDDERR_NODC;
3942     }
3943
3944     /* Give more detailed info for ddraw. */
3945     if (surface->flags & SFLAG_DCINUSE)
3946         return WINEDDERR_DCALREADYCREATED;
3947
3948     /* Can't GetDC if the surface is locked. */
3949     if (surface->resource.map_count)
3950         return WINED3DERR_INVALIDCALL;
3951
3952     /* Create a DIB section if there isn't a dc yet. */
3953     if (!surface->hDC)
3954     {
3955         if (surface->flags & SFLAG_CLIENT)
3956         {
3957             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3958             surface_release_client_storage(surface);
3959         }
3960         hr = surface_create_dib_section(surface);
3961         if (FAILED(hr))
3962             return WINED3DERR_INVALIDCALL;
3963
3964         /* Use the DIB section from now on if we are not using a PBO. */
3965         if (!(surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM)))
3966         {
3967             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
3968             surface->resource.heapMemory = NULL;
3969             surface->resource.allocatedMemory = surface->dib.bitmap_data;
3970         }
3971     }
3972
3973     /* Map the surface. */
3974     hr = wined3d_surface_map(surface, &map, NULL, 0);
3975     if (FAILED(hr))
3976     {
3977         ERR("Map failed, hr %#x.\n", hr);
3978         return hr;
3979     }
3980
3981     /* Sync the DIB with the PBO. This can't be done earlier because Map()
3982      * activates the allocatedMemory. */
3983     if (surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM))
3984         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
3985
3986     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3987             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3988     {
3989         /* GetDC on palettized formats is unsupported in D3D9, and the method
3990          * is missing in D3D8, so this should only be used for DX <=7
3991          * surfaces (with non-device palettes). */
3992         const PALETTEENTRY *pal = NULL;
3993
3994         if (surface->palette)
3995         {
3996             pal = surface->palette->palents;
3997         }
3998         else
3999         {
4000             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
4001             struct wined3d_surface *dds_primary = swapchain->front_buffer;
4002
4003             if (dds_primary && dds_primary->palette)
4004                 pal = dds_primary->palette->palents;
4005         }
4006
4007         if (pal)
4008         {
4009             RGBQUAD col[256];
4010             unsigned int i;
4011
4012             for (i = 0; i < 256; ++i)
4013             {
4014                 col[i].rgbRed = pal[i].peRed;
4015                 col[i].rgbGreen = pal[i].peGreen;
4016                 col[i].rgbBlue = pal[i].peBlue;
4017                 col[i].rgbReserved = 0;
4018             }
4019             SetDIBColorTable(surface->hDC, 0, 256, col);
4020         }
4021     }
4022
4023     surface->flags |= SFLAG_DCINUSE;
4024
4025     *dc = surface->hDC;
4026     TRACE("Returning dc %p.\n", *dc);
4027
4028     return WINED3D_OK;
4029 }
4030
4031 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
4032 {
4033     TRACE("surface %p, dc %p.\n", surface, dc);
4034
4035     if (!(surface->flags & SFLAG_DCINUSE))
4036         return WINEDDERR_NODC;
4037
4038     if (surface->hDC != dc)
4039     {
4040         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
4041                 dc, surface->hDC);
4042         return WINEDDERR_NODC;
4043     }
4044
4045     /* Copy the contents of the DIB over to the PBO. */
4046     if ((surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM)) && surface->resource.allocatedMemory)
4047         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
4048
4049     /* We locked first, so unlock now. */
4050     wined3d_surface_unmap(surface);
4051
4052     surface->flags &= ~SFLAG_DCINUSE;
4053
4054     return WINED3D_OK;
4055 }
4056
4057 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
4058 {
4059     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
4060
4061     if (flags)
4062     {
4063         static UINT once;
4064         if (!once++)
4065             FIXME("Ignoring flags %#x.\n", flags);
4066         else
4067             WARN("Ignoring flags %#x.\n", flags);
4068     }
4069
4070     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
4071     {
4072         ERR("Not supported on swapchain surfaces.\n");
4073         return WINEDDERR_NOTFLIPPABLE;
4074     }
4075
4076     /* Flipping is only supported on render targets and overlays. */
4077     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
4078     {
4079         WARN("Tried to flip a non-render target, non-overlay surface.\n");
4080         return WINEDDERR_NOTFLIPPABLE;
4081     }
4082
4083     flip_surface(surface, override);
4084
4085     /* Update overlays if they're visible. */
4086     if ((surface->resource.usage & WINED3DUSAGE_OVERLAY) && surface->overlay_dest)
4087         return surface_draw_overlay(surface);
4088
4089     return WINED3D_OK;
4090 }
4091
4092 /* Do not call while under the GL lock. */
4093 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
4094 {
4095     struct wined3d_device *device = surface->resource.device;
4096
4097     TRACE("iface %p, srgb %#x.\n", surface, srgb);
4098
4099     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4100     {
4101         struct wined3d_texture *texture = surface->container.u.texture;
4102
4103         TRACE("Passing to container (%p).\n", texture);
4104         texture->texture_ops->texture_preload(texture, srgb);
4105     }
4106     else
4107     {
4108         struct wined3d_context *context;
4109
4110         TRACE("(%p) : About to load surface\n", surface);
4111
4112         /* TODO: Use already acquired context when possible. */
4113         context = context_acquire(device, NULL);
4114
4115         surface_load(surface, srgb == SRGB_SRGB);
4116
4117         if (surface->resource.pool == WINED3D_POOL_DEFAULT)
4118         {
4119             /* Tell opengl to try and keep this texture in video ram (well mostly) */
4120             GLclampf tmp;
4121             tmp = 0.9f;
4122             context->gl_info->gl_ops.gl.p_glPrioritizeTextures(1, &surface->texture_name, &tmp);
4123         }
4124
4125         context_release(context);
4126     }
4127 }
4128
4129 /* Read the framebuffer back into the surface */
4130 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
4131 {
4132     struct wined3d_device *device = surface->resource.device;
4133     const struct wined3d_gl_info *gl_info;
4134     struct wined3d_context *context;
4135     BYTE *mem;
4136     GLint fmt;
4137     GLint type;
4138     BYTE *row, *top, *bottom;
4139     int i;
4140     BOOL bpp;
4141     RECT local_rect;
4142     BOOL srcIsUpsideDown;
4143     GLint rowLen = 0;
4144     GLint skipPix = 0;
4145     GLint skipRow = 0;
4146
4147     context = context_acquire(device, surface);
4148     context_apply_blit_state(context, device);
4149     gl_info = context->gl_info;
4150
4151     /* Select the correct read buffer, and give some debug output.
4152      * There is no need to keep track of the current read buffer or reset it, every part of the code
4153      * that reads sets the read buffer as desired.
4154      */
4155     if (surface_is_offscreen(surface))
4156     {
4157         /* Mapping the primary render target which is not on a swapchain.
4158          * Read from the back buffer. */
4159         TRACE("Mapping offscreen render target.\n");
4160         gl_info->gl_ops.gl.p_glReadBuffer(device->offscreenBuffer);
4161         srcIsUpsideDown = TRUE;
4162     }
4163     else
4164     {
4165         /* Onscreen surfaces are always part of a swapchain */
4166         GLenum buffer = surface_get_gl_buffer(surface);
4167         TRACE("Mapping %#x buffer.\n", buffer);
4168         gl_info->gl_ops.gl.p_glReadBuffer(buffer);
4169         checkGLcall("glReadBuffer");
4170         srcIsUpsideDown = FALSE;
4171     }
4172
4173     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
4174     if (!rect)
4175     {
4176         local_rect.left = 0;
4177         local_rect.top = 0;
4178         local_rect.right = surface->resource.width;
4179         local_rect.bottom = surface->resource.height;
4180     }
4181     else
4182     {
4183         local_rect = *rect;
4184     }
4185     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
4186
4187     switch (surface->resource.format->id)
4188     {
4189         case WINED3DFMT_P8_UINT:
4190         {
4191             if (primary_render_target_is_p8(device))
4192             {
4193                 /* In case of P8 render targets the index is stored in the alpha component */
4194                 fmt = GL_ALPHA;
4195                 type = GL_UNSIGNED_BYTE;
4196                 mem = dest;
4197                 bpp = surface->resource.format->byte_count;
4198             }
4199             else
4200             {
4201                 /* GL can't return palettized data, so read ARGB pixels into a
4202                  * separate block of memory and convert them into palettized format
4203                  * in software. Slow, but if the app means to use palettized render
4204                  * targets and locks it...
4205                  *
4206                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4207                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4208                  * for the color channels when palettizing the colors.
4209                  */
4210                 fmt = GL_RGB;
4211                 type = GL_UNSIGNED_BYTE;
4212                 pitch *= 3;
4213                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4214                 if (!mem)
4215                 {
4216                     ERR("Out of memory\n");
4217                     return;
4218                 }
4219                 bpp = surface->resource.format->byte_count * 3;
4220             }
4221         }
4222         break;
4223
4224         default:
4225             mem = dest;
4226             fmt = surface->resource.format->glFormat;
4227             type = surface->resource.format->glType;
4228             bpp = surface->resource.format->byte_count;
4229     }
4230
4231     if (surface->flags & SFLAG_PBO)
4232     {
4233         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4234         checkGLcall("glBindBufferARB");
4235         if (mem)
4236         {
4237             ERR("mem not null for pbo -- unexpected\n");
4238             mem = NULL;
4239         }
4240     }
4241
4242     /* Save old pixel store pack state */
4243     gl_info->gl_ops.gl.p_glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4244     checkGLcall("glGetIntegerv");
4245     gl_info->gl_ops.gl.p_glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4246     checkGLcall("glGetIntegerv");
4247     gl_info->gl_ops.gl.p_glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4248     checkGLcall("glGetIntegerv");
4249
4250     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4251     gl_info->gl_ops.gl.p_glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4252     checkGLcall("glPixelStorei");
4253     gl_info->gl_ops.gl.p_glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4254     checkGLcall("glPixelStorei");
4255     gl_info->gl_ops.gl.p_glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4256     checkGLcall("glPixelStorei");
4257
4258     gl_info->gl_ops.gl.p_glReadPixels(local_rect.left,
4259             !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4260             local_rect.right - local_rect.left,
4261             local_rect.bottom - local_rect.top,
4262             fmt, type, mem);
4263     checkGLcall("glReadPixels");
4264
4265     /* Reset previous pixel store pack state */
4266     gl_info->gl_ops.gl.p_glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4267     checkGLcall("glPixelStorei");
4268     gl_info->gl_ops.gl.p_glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4269     checkGLcall("glPixelStorei");
4270     gl_info->gl_ops.gl.p_glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4271     checkGLcall("glPixelStorei");
4272
4273     if (surface->flags & SFLAG_PBO)
4274     {
4275         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4276         checkGLcall("glBindBufferARB");
4277
4278         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4279          * to get a pointer to it and perform the flipping in software. This is a lot
4280          * faster than calling glReadPixels for each line. In case we want more speed
4281          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4282         if (!srcIsUpsideDown)
4283         {
4284             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4285             checkGLcall("glBindBufferARB");
4286
4287             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4288             checkGLcall("glMapBufferARB");
4289         }
4290     }
4291
4292     /* TODO: Merge this with the palettization loop below for P8 targets */
4293     if(!srcIsUpsideDown) {
4294         UINT len, off;
4295         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4296             Flip the lines in software */
4297         len = (local_rect.right - local_rect.left) * bpp;
4298         off = local_rect.left * bpp;
4299
4300         row = HeapAlloc(GetProcessHeap(), 0, len);
4301         if(!row) {
4302             ERR("Out of memory\n");
4303             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4304                 HeapFree(GetProcessHeap(), 0, mem);
4305             return;
4306         }
4307
4308         top = mem + pitch * local_rect.top;
4309         bottom = mem + pitch * (local_rect.bottom - 1);
4310         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4311             memcpy(row, top + off, len);
4312             memcpy(top + off, bottom + off, len);
4313             memcpy(bottom + off, row, len);
4314             top += pitch;
4315             bottom -= pitch;
4316         }
4317         HeapFree(GetProcessHeap(), 0, row);
4318
4319         /* Unmap the temp PBO buffer */
4320         if (surface->flags & SFLAG_PBO)
4321         {
4322             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4323             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4324         }
4325     }
4326
4327     context_release(context);
4328
4329     /* For P8 textures we need to perform an inverse palette lookup. This is
4330      * done by searching for a palette index which matches the RGB value.
4331      * Note this isn't guaranteed to work when there are multiple entries for
4332      * the same color but we have no choice. In case of P8 render targets,
4333      * the index is stored in the alpha component so no conversion is needed. */
4334     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4335     {
4336         const PALETTEENTRY *pal = NULL;
4337         DWORD width = pitch / 3;
4338         int x, y, c;
4339
4340         if (surface->palette)
4341         {
4342             pal = surface->palette->palents;
4343         }
4344         else
4345         {
4346             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4347             HeapFree(GetProcessHeap(), 0, mem);
4348             return;
4349         }
4350
4351         for(y = local_rect.top; y < local_rect.bottom; y++) {
4352             for(x = local_rect.left; x < local_rect.right; x++) {
4353                 /*                      start              lines            pixels      */
4354                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4355                 const BYTE *green = blue  + 1;
4356                 const BYTE *red = green + 1;
4357
4358                 for(c = 0; c < 256; c++) {
4359                     if(*red   == pal[c].peRed   &&
4360                        *green == pal[c].peGreen &&
4361                        *blue  == pal[c].peBlue)
4362                     {
4363                         *((BYTE *) dest + y * width + x) = c;
4364                         break;
4365                     }
4366                 }
4367             }
4368         }
4369         HeapFree(GetProcessHeap(), 0, mem);
4370     }
4371 }
4372
4373 /* Read the framebuffer contents into a texture. Note that this function
4374  * doesn't do any kind of flipping. Using this on an onscreen surface will
4375  * result in a flipped D3D texture. */
4376 void surface_load_fb_texture(struct wined3d_surface *surface, BOOL srgb)
4377 {
4378     struct wined3d_device *device = surface->resource.device;
4379     const struct wined3d_gl_info *gl_info;
4380     struct wined3d_context *context;
4381
4382     context = context_acquire(device, surface);
4383     gl_info = context->gl_info;
4384     device_invalidate_state(device, STATE_FRAMEBUFFER);
4385
4386     surface_prepare_texture(surface, context, srgb);
4387     surface_bind_and_dirtify(surface, context, srgb);
4388
4389     TRACE("Reading back offscreen render target %p.\n", surface);
4390
4391     if (surface_is_offscreen(surface))
4392         gl_info->gl_ops.gl.p_glReadBuffer(device->offscreenBuffer);
4393     else
4394         gl_info->gl_ops.gl.p_glReadBuffer(surface_get_gl_buffer(surface));
4395     checkGLcall("glReadBuffer");
4396
4397     gl_info->gl_ops.gl.p_glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4398             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4399     checkGLcall("glCopyTexSubImage2D");
4400
4401     context_release(context);
4402 }
4403
4404 /* Context activation is done by the caller. */
4405 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4406         struct wined3d_context *context, BOOL srgb)
4407 {
4408     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4409     enum wined3d_conversion_type convert;
4410     struct wined3d_format format;
4411
4412     if (surface->flags & alloc_flag) return;
4413
4414     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4415     if (convert != WINED3D_CT_NONE || format.convert)
4416         surface->flags |= SFLAG_CONVERTED;
4417     else surface->flags &= ~SFLAG_CONVERTED;
4418
4419     surface_bind_and_dirtify(surface, context, srgb);
4420     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4421     surface->flags |= alloc_flag;
4422 }
4423
4424 /* Context activation is done by the caller. */
4425 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4426 {
4427     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4428     {
4429         struct wined3d_texture *texture = surface->container.u.texture;
4430         UINT sub_count = texture->level_count * texture->layer_count;
4431         UINT i;
4432
4433         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4434
4435         for (i = 0; i < sub_count; ++i)
4436         {
4437             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4438             surface_prepare_texture_internal(s, context, srgb);
4439         }
4440
4441         return;
4442     }
4443
4444     surface_prepare_texture_internal(surface, context, srgb);
4445 }
4446
4447 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4448 {
4449     if (multisample)
4450     {
4451         if (surface->rb_multisample)
4452             return;
4453
4454         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4455         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4456         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4457                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4458         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4459     }
4460     else
4461     {
4462         if (surface->rb_resolved)
4463             return;
4464
4465         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4466         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4467         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4468                 surface->pow2Width, surface->pow2Height);
4469         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4470     }
4471 }
4472
4473 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4474         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4475 {
4476     struct wined3d_device *device = surface->resource.device;
4477     UINT pitch = wined3d_surface_get_pitch(surface);
4478     const struct wined3d_gl_info *gl_info;
4479     struct wined3d_context *context;
4480     RECT local_rect;
4481     UINT w, h;
4482
4483     surface_get_rect(surface, rect, &local_rect);
4484
4485     mem += local_rect.top * pitch + local_rect.left * bpp;
4486     w = local_rect.right - local_rect.left;
4487     h = local_rect.bottom - local_rect.top;
4488
4489     /* Activate the correct context for the render target */
4490     context = context_acquire(device, surface);
4491     context_apply_blit_state(context, device);
4492     gl_info = context->gl_info;
4493
4494     if (!surface_is_offscreen(surface))
4495     {
4496         GLenum buffer = surface_get_gl_buffer(surface);
4497         TRACE("Unlocking %#x buffer.\n", buffer);
4498         context_set_draw_buffer(context, buffer);
4499
4500         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4501         gl_info->gl_ops.gl.p_glPixelZoom(1.0f, -1.0f);
4502     }
4503     else
4504     {
4505         /* Primary offscreen render target */
4506         TRACE("Offscreen render target.\n");
4507         context_set_draw_buffer(context, device->offscreenBuffer);
4508
4509         gl_info->gl_ops.gl.p_glPixelZoom(1.0f, 1.0f);
4510     }
4511
4512     gl_info->gl_ops.gl.p_glRasterPos3i(local_rect.left, local_rect.top, 1);
4513     checkGLcall("glRasterPos3i");
4514
4515     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4516     gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4517
4518     if (surface->flags & SFLAG_PBO)
4519     {
4520         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4521         checkGLcall("glBindBufferARB");
4522     }
4523
4524     gl_info->gl_ops.gl.p_glDrawPixels(w, h, fmt, type, mem);
4525     checkGLcall("glDrawPixels");
4526
4527     if (surface->flags & SFLAG_PBO)
4528     {
4529         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4530         checkGLcall("glBindBufferARB");
4531     }
4532
4533     gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4534     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4535
4536     if (wined3d_settings.strict_draw_ordering
4537             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4538             && surface->container.u.swapchain->front_buffer == surface))
4539         gl_info->gl_ops.gl.p_glFlush();
4540
4541     context_release(context);
4542 }
4543
4544 static BOOL color_in_range(const struct wined3d_color_key *color_key, DWORD color)
4545 {
4546     /* FIXME: Is this really how color keys are supposed to work? I think it
4547      * makes more sense to compare the individual channels. */
4548     return color >= color_key->color_space_low_value
4549             && color <= color_key->color_space_high_value;
4550 }
4551
4552 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4553 {
4554     const struct wined3d_device *device = surface->resource.device;
4555     const struct wined3d_palette *pal = surface->palette;
4556     BOOL index_in_alpha = FALSE;
4557     unsigned int i;
4558
4559     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4560      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4561      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4562      * duplicate entries. Store the color key in the unused alpha component to speed the
4563      * download up and to make conversion unneeded. */
4564     index_in_alpha = primary_render_target_is_p8(device);
4565
4566     if (!pal)
4567     {
4568         ERR("This code should never get entered for DirectDraw!, expect problems\n");
4569         if (index_in_alpha)
4570         {
4571             /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4572              * there's no palette at this time. */
4573             for (i = 0; i < 256; i++) table[i][3] = i;
4574         }
4575     }
4576     else
4577     {
4578         TRACE("Using surface palette %p\n", pal);
4579         /* Get the surface's palette */
4580         for (i = 0; i < 256; ++i)
4581         {
4582             table[i][0] = pal->palents[i].peRed;
4583             table[i][1] = pal->palents[i].peGreen;
4584             table[i][2] = pal->palents[i].peBlue;
4585
4586             /* When index_in_alpha is set the palette index is stored in the
4587              * alpha component. In case of a readback we can then read
4588              * GL_ALPHA. Color keying is handled in BltOverride using a
4589              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4590              * color key itself is passed to glAlphaFunc in other cases the
4591              * alpha component of pixels that should be masked away is set to 0. */
4592             if (index_in_alpha)
4593                 table[i][3] = i;
4594             else if (colorkey && color_in_range(&surface->src_blt_color_key, i))
4595                 table[i][3] = 0x00;
4596             else if (pal->flags & WINEDDPCAPS_ALPHA)
4597                 table[i][3] = pal->palents[i].peFlags;
4598             else
4599                 table[i][3] = 0xff;
4600         }
4601     }
4602 }
4603
4604 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width, UINT height,
4605         UINT outpitch, enum wined3d_conversion_type conversion_type, struct wined3d_surface *surface)
4606 {
4607     const BYTE *source;
4608     BYTE *dest;
4609
4610     TRACE("src %p, dst %p, pitch %u, width %u, height %u, outpitch %u, conversion_type %#x, surface %p.\n",
4611             src, dst, pitch, width, height, outpitch, conversion_type, surface);
4612
4613     switch (conversion_type)
4614     {
4615         case WINED3D_CT_NONE:
4616         {
4617             memcpy(dst, src, pitch * height);
4618             break;
4619         }
4620
4621         case WINED3D_CT_PALETTED:
4622         case WINED3D_CT_PALETTED_CK:
4623         {
4624             BYTE table[256][4];
4625             unsigned int x, y;
4626
4627             d3dfmt_p8_init_palette(surface, table, (conversion_type == WINED3D_CT_PALETTED_CK));
4628
4629             for (y = 0; y < height; y++)
4630             {
4631                 source = src + pitch * y;
4632                 dest = dst + outpitch * y;
4633                 /* This is an 1 bpp format, using the width here is fine */
4634                 for (x = 0; x < width; x++) {
4635                     BYTE color = *source++;
4636                     *dest++ = table[color][0];
4637                     *dest++ = table[color][1];
4638                     *dest++ = table[color][2];
4639                     *dest++ = table[color][3];
4640                 }
4641             }
4642         }
4643         break;
4644
4645         case WINED3D_CT_CK_565:
4646         {
4647             /* Converting the 565 format in 5551 packed to emulate color-keying.
4648
4649               Note : in all these conversion, it would be best to average the averaging
4650                       pixels to get the color of the pixel that will be color-keyed to
4651                       prevent 'color bleeding'. This will be done later on if ever it is
4652                       too visible.
4653
4654               Note2: Nvidia documents say that their driver does not support alpha + color keying
4655                      on the same surface and disables color keying in such a case
4656             */
4657             unsigned int x, y;
4658             const WORD *Source;
4659             WORD *Dest;
4660
4661             TRACE("Color keyed 565\n");
4662
4663             for (y = 0; y < height; y++) {
4664                 Source = (const WORD *)(src + y * pitch);
4665                 Dest = (WORD *) (dst + y * outpitch);
4666                 for (x = 0; x < width; x++ ) {
4667                     WORD color = *Source++;
4668                     *Dest = ((color & 0xffc0) | ((color & 0x1f) << 1));
4669                     if (!color_in_range(&surface->src_blt_color_key, color))
4670                         *Dest |= 0x0001;
4671                     Dest++;
4672                 }
4673             }
4674         }
4675         break;
4676
4677         case WINED3D_CT_CK_5551:
4678         {
4679             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4680             unsigned int x, y;
4681             const WORD *Source;
4682             WORD *Dest;
4683             TRACE("Color keyed 5551\n");
4684             for (y = 0; y < height; y++) {
4685                 Source = (const WORD *)(src + y * pitch);
4686                 Dest = (WORD *) (dst + y * outpitch);
4687                 for (x = 0; x < width; x++ ) {
4688                     WORD color = *Source++;
4689                     *Dest = color;
4690                     if (!color_in_range(&surface->src_blt_color_key, color))
4691                         *Dest |= (1 << 15);
4692                     else
4693                         *Dest &= ~(1 << 15);
4694                     Dest++;
4695                 }
4696             }
4697         }
4698         break;
4699
4700         case WINED3D_CT_CK_RGB24:
4701         {
4702             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4703             unsigned int x, y;
4704             for (y = 0; y < height; y++)
4705             {
4706                 source = src + pitch * y;
4707                 dest = dst + outpitch * y;
4708                 for (x = 0; x < width; x++) {
4709                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4710                     DWORD dstcolor = color << 8;
4711                     if (!color_in_range(&surface->src_blt_color_key, color))
4712                         dstcolor |= 0xff;
4713                     *(DWORD*)dest = dstcolor;
4714                     source += 3;
4715                     dest += 4;
4716                 }
4717             }
4718         }
4719         break;
4720
4721         case WINED3D_CT_RGB32_888:
4722         {
4723             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4724             unsigned int x, y;
4725             for (y = 0; y < height; y++)
4726             {
4727                 source = src + pitch * y;
4728                 dest = dst + outpitch * y;
4729                 for (x = 0; x < width; x++) {
4730                     DWORD color = 0xffffff & *(const DWORD*)source;
4731                     DWORD dstcolor = color << 8;
4732                     if (!color_in_range(&surface->src_blt_color_key, color))
4733                         dstcolor |= 0xff;
4734                     *(DWORD*)dest = dstcolor;
4735                     source += 4;
4736                     dest += 4;
4737                 }
4738             }
4739         }
4740         break;
4741
4742         case WINED3D_CT_CK_ARGB32:
4743         {
4744             unsigned int x, y;
4745             for (y = 0; y < height; ++y)
4746             {
4747                 source = src + pitch * y;
4748                 dest = dst + outpitch * y;
4749                 for (x = 0; x < width; ++x)
4750                 {
4751                     DWORD color = *(const DWORD *)source;
4752                     if (color_in_range(&surface->src_blt_color_key, color))
4753                         color &= ~0xff000000;
4754                     *(DWORD*)dest = color;
4755                     source += 4;
4756                     dest += 4;
4757                 }
4758             }
4759         }
4760         break;
4761
4762         default:
4763             ERR("Unsupported conversion type %#x.\n", conversion_type);
4764     }
4765     return WINED3D_OK;
4766 }
4767
4768 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4769 {
4770     /* Flip the surface contents */
4771     /* Flip the DC */
4772     {
4773         HDC tmp;
4774         tmp = front->hDC;
4775         front->hDC = back->hDC;
4776         back->hDC = tmp;
4777     }
4778
4779     /* Flip the DIBsection */
4780     {
4781         HBITMAP tmp = front->dib.DIBsection;
4782         front->dib.DIBsection = back->dib.DIBsection;
4783         back->dib.DIBsection = tmp;
4784     }
4785
4786     /* Flip the surface data */
4787     {
4788         void* tmp;
4789
4790         tmp = front->dib.bitmap_data;
4791         front->dib.bitmap_data = back->dib.bitmap_data;
4792         back->dib.bitmap_data = tmp;
4793
4794         tmp = front->resource.allocatedMemory;
4795         front->resource.allocatedMemory = back->resource.allocatedMemory;
4796         back->resource.allocatedMemory = tmp;
4797
4798         tmp = front->resource.heapMemory;
4799         front->resource.heapMemory = back->resource.heapMemory;
4800         back->resource.heapMemory = tmp;
4801     }
4802
4803     /* Flip the PBO */
4804     {
4805         GLuint tmp_pbo = front->pbo;
4806         front->pbo = back->pbo;
4807         back->pbo = tmp_pbo;
4808     }
4809
4810     /* Flip the opengl texture */
4811     {
4812         GLuint tmp;
4813
4814         tmp = back->texture_name;
4815         back->texture_name = front->texture_name;
4816         front->texture_name = tmp;
4817
4818         tmp = back->texture_name_srgb;
4819         back->texture_name_srgb = front->texture_name_srgb;
4820         front->texture_name_srgb = tmp;
4821
4822         tmp = back->rb_multisample;
4823         back->rb_multisample = front->rb_multisample;
4824         front->rb_multisample = tmp;
4825
4826         tmp = back->rb_resolved;
4827         back->rb_resolved = front->rb_resolved;
4828         front->rb_resolved = tmp;
4829
4830         resource_unload(&back->resource);
4831         resource_unload(&front->resource);
4832     }
4833
4834     {
4835         DWORD tmp_flags = back->flags;
4836         back->flags = front->flags;
4837         front->flags = tmp_flags;
4838     }
4839 }
4840
4841 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4842  * pixel copy calls. */
4843 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4844         const RECT *src_rect, const RECT *dst_rect_in, enum wined3d_texture_filter_type filter)
4845 {
4846     struct wined3d_device *device = dst_surface->resource.device;
4847     const struct wined3d_gl_info *gl_info;
4848     float xrel, yrel;
4849     struct wined3d_context *context;
4850     BOOL upsidedown = FALSE;
4851     RECT dst_rect = *dst_rect_in;
4852     GLenum dst_target;
4853
4854     if (dst_surface->container.type == WINED3D_CONTAINER_TEXTURE)
4855         dst_target = dst_surface->container.u.texture->target;
4856     else
4857         dst_target = dst_surface->texture_target;
4858
4859     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4860      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4861      */
4862     if(dst_rect.top > dst_rect.bottom) {
4863         UINT tmp = dst_rect.bottom;
4864         dst_rect.bottom = dst_rect.top;
4865         dst_rect.top = tmp;
4866         upsidedown = TRUE;
4867     }
4868
4869     context = context_acquire(device, src_surface);
4870     gl_info = context->gl_info;
4871     context_apply_blit_state(context, device);
4872     surface_internal_preload(dst_surface, SRGB_RGB);
4873
4874     /* Bind the target texture */
4875     context_bind_texture(context, dst_target, dst_surface->texture_name);
4876     if (surface_is_offscreen(src_surface))
4877     {
4878         TRACE("Reading from an offscreen target\n");
4879         upsidedown = !upsidedown;
4880         gl_info->gl_ops.gl.p_glReadBuffer(device->offscreenBuffer);
4881     }
4882     else
4883     {
4884         gl_info->gl_ops.gl.p_glReadBuffer(surface_get_gl_buffer(src_surface));
4885     }
4886     checkGLcall("glReadBuffer");
4887
4888     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4889     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4890
4891     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4892     {
4893         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4894
4895         if (filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT)
4896             ERR("Texture filtering not supported in direct blit.\n");
4897     }
4898     else if ((filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT)
4899             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4900     {
4901         ERR("Texture filtering not supported in direct blit\n");
4902     }
4903
4904     if (upsidedown
4905             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4906             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4907     {
4908         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do. */
4909         gl_info->gl_ops.gl.p_glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4910                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4911                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4912                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4913     }
4914     else
4915     {
4916         LONG row;
4917         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4918         /* I have to process this row by row to swap the image,
4919          * otherwise it would be upside down, so stretching in y direction
4920          * doesn't cost extra time
4921          *
4922          * However, stretching in x direction can be avoided if not necessary
4923          */
4924         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4925             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4926             {
4927                 /* Well, that stuff works, but it's very slow.
4928                  * find a better way instead
4929                  */
4930                 LONG col;
4931
4932                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4933                 {
4934                     gl_info->gl_ops.gl.p_glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4935                             dst_rect.left + col /* x offset */, row /* y offset */,
4936                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4937                 }
4938             }
4939             else
4940             {
4941                 gl_info->gl_ops.gl.p_glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4942                         dst_rect.left /* x offset */, row /* y offset */,
4943                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4944             }
4945         }
4946     }
4947     checkGLcall("glCopyTexSubImage2D");
4948
4949     context_release(context);
4950
4951     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4952      * path is never entered
4953      */
4954     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4955 }
4956
4957 /* Uses the hardware to stretch and flip the image */
4958 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4959         const RECT *src_rect, const RECT *dst_rect_in, enum wined3d_texture_filter_type filter)
4960 {
4961     struct wined3d_device *device = dst_surface->resource.device;
4962     struct wined3d_swapchain *src_swapchain = NULL;
4963     GLuint src, backup = 0;
4964     float left, right, top, bottom; /* Texture coordinates */
4965     UINT fbwidth = src_surface->resource.width;
4966     UINT fbheight = src_surface->resource.height;
4967     const struct wined3d_gl_info *gl_info;
4968     struct wined3d_context *context;
4969     GLenum drawBuffer = GL_BACK;
4970     GLenum texture_target;
4971     BOOL noBackBufferBackup;
4972     BOOL src_offscreen;
4973     BOOL upsidedown = FALSE;
4974     RECT dst_rect = *dst_rect_in;
4975
4976     TRACE("Using hwstretch blit\n");
4977     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4978     context = context_acquire(device, src_surface);
4979     gl_info = context->gl_info;
4980     context_apply_blit_state(context, device);
4981     surface_internal_preload(dst_surface, SRGB_RGB);
4982
4983     src_offscreen = surface_is_offscreen(src_surface);
4984     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4985     if (!noBackBufferBackup && !src_surface->texture_name)
4986     {
4987         /* Get it a description */
4988         surface_internal_preload(src_surface, SRGB_RGB);
4989     }
4990
4991     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4992      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4993      */
4994     if (context->aux_buffers >= 2)
4995     {
4996         /* Got more than one aux buffer? Use the 2nd aux buffer */
4997         drawBuffer = GL_AUX1;
4998     }
4999     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
5000     {
5001         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
5002         drawBuffer = GL_AUX0;
5003     }
5004
5005     if (noBackBufferBackup)
5006     {
5007         gl_info->gl_ops.gl.p_glGenTextures(1, &backup);
5008         checkGLcall("glGenTextures");
5009         context_bind_texture(context, GL_TEXTURE_2D, backup);
5010         texture_target = GL_TEXTURE_2D;
5011     }
5012     else
5013     {
5014         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
5015          * we are reading from the back buffer, the backup can be used as source texture
5016          */
5017         texture_target = src_surface->texture_target;
5018         context_bind_texture(context, texture_target, src_surface->texture_name);
5019         gl_info->gl_ops.gl.p_glEnable(texture_target);
5020         checkGLcall("glEnable(texture_target)");
5021
5022         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
5023         src_surface->flags &= ~SFLAG_INTEXTURE;
5024     }
5025
5026     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
5027      * glCopyTexSubImage is a bit picky about the parameters we pass to it
5028      */
5029     if(dst_rect.top > dst_rect.bottom) {
5030         UINT tmp = dst_rect.bottom;
5031         dst_rect.bottom = dst_rect.top;
5032         dst_rect.top = tmp;
5033         upsidedown = TRUE;
5034     }
5035
5036     if (src_offscreen)
5037     {
5038         TRACE("Reading from an offscreen target\n");
5039         upsidedown = !upsidedown;
5040         gl_info->gl_ops.gl.p_glReadBuffer(device->offscreenBuffer);
5041     }
5042     else
5043     {
5044         gl_info->gl_ops.gl.p_glReadBuffer(surface_get_gl_buffer(src_surface));
5045     }
5046
5047     /* TODO: Only back up the part that will be overwritten */
5048     gl_info->gl_ops.gl.p_glCopyTexSubImage2D(texture_target, 0, 0, 0, 0, 0, fbwidth, fbheight);
5049
5050     checkGLcall("glCopyTexSubImage2D");
5051
5052     /* No issue with overriding these - the sampler is dirty due to blit usage */
5053     gl_info->gl_ops.gl.p_glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
5054             wined3d_gl_mag_filter(magLookup, filter));
5055     checkGLcall("glTexParameteri");
5056     gl_info->gl_ops.gl.p_glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
5057             wined3d_gl_min_mip_filter(minMipLookup, filter, WINED3D_TEXF_NONE));
5058     checkGLcall("glTexParameteri");
5059
5060     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5061         src_swapchain = src_surface->container.u.swapchain;
5062     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
5063     {
5064         src = backup ? backup : src_surface->texture_name;
5065     }
5066     else
5067     {
5068         gl_info->gl_ops.gl.p_glReadBuffer(GL_FRONT);
5069         checkGLcall("glReadBuffer(GL_FRONT)");
5070
5071         gl_info->gl_ops.gl.p_glGenTextures(1, &src);
5072         checkGLcall("glGenTextures(1, &src)");
5073         context_bind_texture(context, GL_TEXTURE_2D, src);
5074
5075         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5076          * out for power of 2 sizes
5077          */
5078         gl_info->gl_ops.gl.p_glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5079                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5080         checkGLcall("glTexImage2D");
5081         gl_info->gl_ops.gl.p_glCopyTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 0, 0, fbwidth, fbheight);
5082
5083         gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5084         checkGLcall("glTexParameteri");
5085         gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5086         checkGLcall("glTexParameteri");
5087
5088         gl_info->gl_ops.gl.p_glReadBuffer(GL_BACK);
5089         checkGLcall("glReadBuffer(GL_BACK)");
5090
5091         if (texture_target != GL_TEXTURE_2D)
5092         {
5093             gl_info->gl_ops.gl.p_glDisable(texture_target);
5094             gl_info->gl_ops.gl.p_glEnable(GL_TEXTURE_2D);
5095             texture_target = GL_TEXTURE_2D;
5096         }
5097     }
5098     checkGLcall("glEnd and previous");
5099
5100     left = src_rect->left;
5101     right = src_rect->right;
5102
5103     if (!upsidedown)
5104     {
5105         top = src_surface->resource.height - src_rect->top;
5106         bottom = src_surface->resource.height - src_rect->bottom;
5107     }
5108     else
5109     {
5110         top = src_surface->resource.height - src_rect->bottom;
5111         bottom = src_surface->resource.height - src_rect->top;
5112     }
5113
5114     if (src_surface->flags & SFLAG_NORMCOORD)
5115     {
5116         left /= src_surface->pow2Width;
5117         right /= src_surface->pow2Width;
5118         top /= src_surface->pow2Height;
5119         bottom /= src_surface->pow2Height;
5120     }
5121
5122     /* draw the source texture stretched and upside down. The correct surface is bound already */
5123     gl_info->gl_ops.gl.p_glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5124     gl_info->gl_ops.gl.p_glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5125
5126     context_set_draw_buffer(context, drawBuffer);
5127     gl_info->gl_ops.gl.p_glReadBuffer(drawBuffer);
5128
5129     gl_info->gl_ops.gl.p_glBegin(GL_QUADS);
5130         /* bottom left */
5131         gl_info->gl_ops.gl.p_glTexCoord2f(left, bottom);
5132         gl_info->gl_ops.gl.p_glVertex2i(0, 0);
5133
5134         /* top left */
5135         gl_info->gl_ops.gl.p_glTexCoord2f(left, top);
5136         gl_info->gl_ops.gl.p_glVertex2i(0, dst_rect.bottom - dst_rect.top);
5137
5138         /* top right */
5139         gl_info->gl_ops.gl.p_glTexCoord2f(right, top);
5140         gl_info->gl_ops.gl.p_glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5141
5142         /* bottom right */
5143         gl_info->gl_ops.gl.p_glTexCoord2f(right, bottom);
5144         gl_info->gl_ops.gl.p_glVertex2i(dst_rect.right - dst_rect.left, 0);
5145     gl_info->gl_ops.gl.p_glEnd();
5146     checkGLcall("glEnd and previous");
5147
5148     if (texture_target != dst_surface->texture_target)
5149     {
5150         gl_info->gl_ops.gl.p_glDisable(texture_target);
5151         gl_info->gl_ops.gl.p_glEnable(dst_surface->texture_target);
5152         texture_target = dst_surface->texture_target;
5153     }
5154
5155     /* Now read the stretched and upside down image into the destination texture */
5156     context_bind_texture(context, texture_target, dst_surface->texture_name);
5157     gl_info->gl_ops.gl.p_glCopyTexSubImage2D(texture_target,
5158                         0,
5159                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5160                         0, 0, /* We blitted the image to the origin */
5161                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5162     checkGLcall("glCopyTexSubImage2D");
5163
5164     if (drawBuffer == GL_BACK)
5165     {
5166         /* Write the back buffer backup back. */
5167         if (backup)
5168         {
5169             if (texture_target != GL_TEXTURE_2D)
5170             {
5171                 gl_info->gl_ops.gl.p_glDisable(texture_target);
5172                 gl_info->gl_ops.gl.p_glEnable(GL_TEXTURE_2D);
5173                 texture_target = GL_TEXTURE_2D;
5174             }
5175             context_bind_texture(context, GL_TEXTURE_2D, backup);
5176         }
5177         else
5178         {
5179             if (texture_target != src_surface->texture_target)
5180             {
5181                 gl_info->gl_ops.gl.p_glDisable(texture_target);
5182                 gl_info->gl_ops.gl.p_glEnable(src_surface->texture_target);
5183                 texture_target = src_surface->texture_target;
5184             }
5185             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5186         }
5187
5188         gl_info->gl_ops.gl.p_glBegin(GL_QUADS);
5189             /* top left */
5190             gl_info->gl_ops.gl.p_glTexCoord2f(0.0f, 0.0f);
5191             gl_info->gl_ops.gl.p_glVertex2i(0, fbheight);
5192
5193             /* bottom left */
5194             gl_info->gl_ops.gl.p_glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5195             gl_info->gl_ops.gl.p_glVertex2i(0, 0);
5196
5197             /* bottom right */
5198             gl_info->gl_ops.gl.p_glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5199                     (float)fbheight / (float)src_surface->pow2Height);
5200             gl_info->gl_ops.gl.p_glVertex2i(fbwidth, 0);
5201
5202             /* top right */
5203             gl_info->gl_ops.gl.p_glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5204             gl_info->gl_ops.gl.p_glVertex2i(fbwidth, fbheight);
5205         gl_info->gl_ops.gl.p_glEnd();
5206     }
5207     gl_info->gl_ops.gl.p_glDisable(texture_target);
5208     checkGLcall("glDisable(texture_target)");
5209
5210     /* Cleanup */
5211     if (src != src_surface->texture_name && src != backup)
5212     {
5213         gl_info->gl_ops.gl.p_glDeleteTextures(1, &src);
5214         checkGLcall("glDeleteTextures(1, &src)");
5215     }
5216     if (backup)
5217     {
5218         gl_info->gl_ops.gl.p_glDeleteTextures(1, &backup);
5219         checkGLcall("glDeleteTextures(1, &backup)");
5220     }
5221
5222     if (wined3d_settings.strict_draw_ordering)
5223         gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */
5224
5225     context_release(context);
5226
5227     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5228      * path is never entered
5229      */
5230     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5231 }
5232
5233 /* Front buffer coordinates are always full screen coordinates, but our GL
5234  * drawable is limited to the window's client area. The sysmem and texture
5235  * copies do have the full screen size. Note that GL has a bottom-left
5236  * origin, while D3D has a top-left origin. */
5237 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5238 {
5239     UINT drawable_height;
5240
5241     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5242             && surface == surface->container.u.swapchain->front_buffer)
5243     {
5244         POINT offset = {0, 0};
5245         RECT windowsize;
5246
5247         ScreenToClient(window, &offset);
5248         OffsetRect(rect, offset.x, offset.y);
5249
5250         GetClientRect(window, &windowsize);
5251         drawable_height = windowsize.bottom - windowsize.top;
5252     }
5253     else
5254     {
5255         drawable_height = surface->resource.height;
5256     }
5257
5258     rect->top = drawable_height - rect->top;
5259     rect->bottom = drawable_height - rect->bottom;
5260 }
5261
5262 static void surface_blt_to_drawable(const struct wined3d_device *device,
5263         enum wined3d_texture_filter_type filter, BOOL color_key,
5264         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5265         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5266 {
5267     const struct wined3d_gl_info *gl_info;
5268     struct wined3d_context *context;
5269     RECT src_rect, dst_rect;
5270
5271     src_rect = *src_rect_in;
5272     dst_rect = *dst_rect_in;
5273
5274     /* Make sure the surface is up-to-date. This should probably use
5275      * surface_load_location() and worry about the destination surface too,
5276      * unless we're overwriting it completely. */
5277     surface_internal_preload(src_surface, SRGB_RGB);
5278
5279     /* Activate the destination context, set it up for blitting */
5280     context = context_acquire(device, dst_surface);
5281     gl_info = context->gl_info;
5282     context_apply_blit_state(context, device);
5283
5284     if (!surface_is_offscreen(dst_surface))
5285         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5286
5287     device->blitter->set_shader(device->blit_priv, context, src_surface);
5288
5289     if (color_key)
5290     {
5291         gl_info->gl_ops.gl.p_glEnable(GL_ALPHA_TEST);
5292         checkGLcall("glEnable(GL_ALPHA_TEST)");
5293
5294         /* When the primary render target uses P8, the alpha component
5295          * contains the palette index. Which means that the colorkey is one of
5296          * the palette entries. In other cases pixels that should be masked
5297          * away have alpha set to 0. */
5298         if (primary_render_target_is_p8(device))
5299             gl_info->gl_ops.gl.p_glAlphaFunc(GL_NOTEQUAL,
5300                     (float)src_surface->src_blt_color_key.color_space_low_value / 256.0f);
5301         else
5302             gl_info->gl_ops.gl.p_glAlphaFunc(GL_NOTEQUAL, 0.0f);
5303         checkGLcall("glAlphaFunc");
5304     }
5305     else
5306     {
5307         gl_info->gl_ops.gl.p_glDisable(GL_ALPHA_TEST);
5308         checkGLcall("glDisable(GL_ALPHA_TEST)");
5309     }
5310
5311     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5312
5313     if (color_key)
5314     {
5315         gl_info->gl_ops.gl.p_glDisable(GL_ALPHA_TEST);
5316         checkGLcall("glDisable(GL_ALPHA_TEST)");
5317     }
5318
5319     /* Leave the opengl state valid for blitting */
5320     device->blitter->unset_shader(context->gl_info);
5321
5322     if (wined3d_settings.strict_draw_ordering
5323             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5324             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5325         gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */
5326
5327     context_release(context);
5328 }
5329
5330 /* Do not call while under the GL lock. */
5331 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const struct wined3d_color *color)
5332 {
5333     struct wined3d_device *device = s->resource.device;
5334     const struct blit_shader *blitter;
5335
5336     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5337             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5338     if (!blitter)
5339     {
5340         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5341         return WINED3DERR_INVALIDCALL;
5342     }
5343
5344     return blitter->color_fill(device, s, rect, color);
5345 }
5346
5347 /* Do not call while under the GL lock. */
5348 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5349         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5350         enum wined3d_texture_filter_type filter)
5351 {
5352     struct wined3d_device *device = dst_surface->resource.device;
5353     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5354     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5355
5356     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5357             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5358             flags, DDBltFx, debug_d3dtexturefiltertype(filter));
5359
5360     /* Get the swapchain. One of the surfaces has to be a primary surface */
5361     if (dst_surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
5362     {
5363         WARN("Destination is in sysmem, rejecting gl blt\n");
5364         return WINED3DERR_INVALIDCALL;
5365     }
5366
5367     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5368         dstSwapchain = dst_surface->container.u.swapchain;
5369
5370     if (src_surface)
5371     {
5372         if (src_surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
5373         {
5374             WARN("Src is in sysmem, rejecting gl blt\n");
5375             return WINED3DERR_INVALIDCALL;
5376         }
5377
5378         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5379             srcSwapchain = src_surface->container.u.swapchain;
5380     }
5381
5382     /* Early sort out of cases where no render target is used */
5383     if (!dstSwapchain && !srcSwapchain
5384             && src_surface != device->fb.render_targets[0]
5385             && dst_surface != device->fb.render_targets[0])
5386     {
5387         TRACE("No surface is render target, not using hardware blit.\n");
5388         return WINED3DERR_INVALIDCALL;
5389     }
5390
5391     /* No destination color keying supported */
5392     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5393     {
5394         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5395         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5396         return WINED3DERR_INVALIDCALL;
5397     }
5398
5399     if (dstSwapchain && dstSwapchain == srcSwapchain)
5400     {
5401         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5402         return WINED3DERR_INVALIDCALL;
5403     }
5404
5405     if (dstSwapchain && srcSwapchain)
5406     {
5407         FIXME("Implement hardware blit between two different swapchains\n");
5408         return WINED3DERR_INVALIDCALL;
5409     }
5410
5411     if (dstSwapchain)
5412     {
5413         /* Handled with regular texture -> swapchain blit */
5414         if (src_surface == device->fb.render_targets[0])
5415             TRACE("Blit from active render target to a swapchain\n");
5416     }
5417     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5418     {
5419         FIXME("Implement blit from a swapchain to the active render target\n");
5420         return WINED3DERR_INVALIDCALL;
5421     }
5422
5423     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5424     {
5425         /* Blit from render target to texture */
5426         BOOL stretchx;
5427
5428         /* P8 read back is not implemented */
5429         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5430                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5431         {
5432             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5433             return WINED3DERR_INVALIDCALL;
5434         }
5435
5436         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5437         {
5438             TRACE("Color keying not supported by frame buffer to texture blit\n");
5439             return WINED3DERR_INVALIDCALL;
5440             /* Destination color key is checked above */
5441         }
5442
5443         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5444             stretchx = TRUE;
5445         else
5446             stretchx = FALSE;
5447
5448         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5449          * flip the image nor scale it.
5450          *
5451          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5452          * -> If the app wants a image width an unscaled width, copy it line per line
5453          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5454          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5455          *    back buffer. This is slower than reading line per line, thus not used for flipping
5456          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5457          *    pixel by pixel. */
5458         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5459                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5460         {
5461             TRACE("No stretching in x direction, using direct framebuffer -> texture copy.\n");
5462             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, filter);
5463         }
5464         else
5465         {
5466             TRACE("Using hardware stretching to flip / stretch the texture.\n");
5467             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, filter);
5468         }
5469
5470         if (!dst_surface->resource.map_count && !(dst_surface->flags & SFLAG_DONOTFREE))
5471         {
5472             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5473             dst_surface->resource.allocatedMemory = NULL;
5474             dst_surface->resource.heapMemory = NULL;
5475         }
5476         else
5477         {
5478             dst_surface->flags &= ~SFLAG_INSYSMEM;
5479         }
5480
5481         return WINED3D_OK;
5482     }
5483     else if (src_surface)
5484     {
5485         /* Blit from offscreen surface to render target */
5486         struct wined3d_color_key old_blt_key = src_surface->src_blt_color_key;
5487         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5488
5489         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5490
5491         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5492                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5493                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5494         {
5495             FIXME("Unsupported blit operation falling back to software\n");
5496             return WINED3DERR_INVALIDCALL;
5497         }
5498
5499         /* Color keying: Check if we have to do a color keyed blt,
5500          * and if not check if a color key is activated.
5501          *
5502          * Just modify the color keying parameters in the surface and restore them afterwards
5503          * The surface keeps track of the color key last used to load the opengl surface.
5504          * PreLoad will catch the change to the flags and color key and reload if necessary.
5505          */
5506         if (flags & WINEDDBLT_KEYSRC)
5507         {
5508             /* Use color key from surface */
5509         }
5510         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5511         {
5512             /* Use color key from DDBltFx */
5513             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5514             src_surface->src_blt_color_key = DDBltFx->ddckSrcColorkey;
5515         }
5516         else
5517         {
5518             /* Do not use color key */
5519             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5520         }
5521
5522         surface_blt_to_drawable(device, filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5523                 src_surface, src_rect, dst_surface, dst_rect);
5524
5525         /* Restore the color key parameters */
5526         src_surface->CKeyFlags = oldCKeyFlags;
5527         src_surface->src_blt_color_key = old_blt_key;
5528
5529         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5530
5531         return WINED3D_OK;
5532     }
5533
5534     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5535     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5536     return WINED3DERR_INVALIDCALL;
5537 }
5538
5539 /* Context activation is done by the caller. */
5540 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5541         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5542 {
5543     struct wined3d_device *device = surface->resource.device;
5544     const struct wined3d_gl_info *gl_info = context->gl_info;
5545     GLint compare_mode = GL_NONE;
5546     struct blt_info info;
5547     GLint old_binding = 0;
5548     RECT rect;
5549
5550     gl_info->gl_ops.gl.p_glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5551
5552     gl_info->gl_ops.gl.p_glDisable(GL_CULL_FACE);
5553     gl_info->gl_ops.gl.p_glDisable(GL_BLEND);
5554     gl_info->gl_ops.gl.p_glDisable(GL_ALPHA_TEST);
5555     gl_info->gl_ops.gl.p_glDisable(GL_SCISSOR_TEST);
5556     gl_info->gl_ops.gl.p_glDisable(GL_STENCIL_TEST);
5557     gl_info->gl_ops.gl.p_glEnable(GL_DEPTH_TEST);
5558     gl_info->gl_ops.gl.p_glDepthFunc(GL_ALWAYS);
5559     gl_info->gl_ops.gl.p_glDepthMask(GL_TRUE);
5560     gl_info->gl_ops.gl.p_glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5561     gl_info->gl_ops.gl.p_glViewport(x, y, w, h);
5562     gl_info->gl_ops.gl.p_glDepthRange(0.0, 1.0);
5563
5564     SetRect(&rect, 0, h, w, 0);
5565     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5566     context_active_texture(context, context->gl_info, 0);
5567     gl_info->gl_ops.gl.p_glGetIntegerv(info.binding, &old_binding);
5568     gl_info->gl_ops.gl.p_glBindTexture(info.bind_target, texture);
5569     if (gl_info->supported[ARB_SHADOW])
5570     {
5571         gl_info->gl_ops.gl.p_glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5572         if (compare_mode != GL_NONE)
5573             gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5574     }
5575
5576     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5577             gl_info, info.tex_type, &surface->ds_current_size);
5578
5579     gl_info->gl_ops.gl.p_glBegin(GL_TRIANGLE_STRIP);
5580     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[0]);
5581     gl_info->gl_ops.gl.p_glVertex2f(-1.0f, -1.0f);
5582     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[1]);
5583     gl_info->gl_ops.gl.p_glVertex2f(1.0f, -1.0f);
5584     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[2]);
5585     gl_info->gl_ops.gl.p_glVertex2f(-1.0f, 1.0f);
5586     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[3]);
5587     gl_info->gl_ops.gl.p_glVertex2f(1.0f, 1.0f);
5588     gl_info->gl_ops.gl.p_glEnd();
5589
5590     if (compare_mode != GL_NONE)
5591         gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5592     gl_info->gl_ops.gl.p_glBindTexture(info.bind_target, old_binding);
5593
5594     gl_info->gl_ops.gl.p_glPopAttrib();
5595
5596     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5597 }
5598
5599 void surface_modify_ds_location(struct wined3d_surface *surface,
5600         DWORD location, UINT w, UINT h)
5601 {
5602     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5603
5604     if (location & ~(SFLAG_LOCATIONS | SFLAG_DISCARDED))
5605         FIXME("Invalid location (%#x) specified.\n", location);
5606
5607     if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5608             || (!(surface->flags & SFLAG_INTEXTURE) && (location & SFLAG_INTEXTURE)))
5609     {
5610         if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5611         {
5612             TRACE("Passing to container.\n");
5613             wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5614         }
5615     }
5616
5617     surface->ds_current_size.cx = w;
5618     surface->ds_current_size.cy = h;
5619     surface->flags &= ~(SFLAG_LOCATIONS | SFLAG_DISCARDED);
5620     surface->flags |= location;
5621 }
5622
5623 /* Context activation is done by the caller. */
5624 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5625 {
5626     const struct wined3d_gl_info *gl_info = context->gl_info;
5627     struct wined3d_device *device = surface->resource.device;
5628     GLsizei w, h;
5629
5630     TRACE("surface %p, new location %#x.\n", surface, location);
5631
5632     /* TODO: Make this work for modes other than FBO */
5633     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5634
5635     if (!(surface->flags & location))
5636     {
5637         w = surface->ds_current_size.cx;
5638         h = surface->ds_current_size.cy;
5639         surface->ds_current_size.cx = 0;
5640         surface->ds_current_size.cy = 0;
5641     }
5642     else
5643     {
5644         w = surface->resource.width;
5645         h = surface->resource.height;
5646     }
5647
5648     if (surface->ds_current_size.cx == surface->resource.width
5649             && surface->ds_current_size.cy == surface->resource.height)
5650     {
5651         TRACE("Location (%#x) is already up to date.\n", location);
5652         return;
5653     }
5654
5655     if (surface->current_renderbuffer)
5656     {
5657         FIXME("Not supported with fixed up depth stencil.\n");
5658         return;
5659     }
5660
5661     if (surface->flags & SFLAG_DISCARDED)
5662     {
5663         TRACE("Surface was discarded, no need copy data.\n");
5664         switch (location)
5665         {
5666             case SFLAG_INTEXTURE:
5667                 surface_prepare_texture(surface, context, FALSE);
5668                 break;
5669             case SFLAG_INRB_MULTISAMPLE:
5670                 surface_prepare_rb(surface, gl_info, TRUE);
5671                 break;
5672             case SFLAG_INDRAWABLE:
5673                 /* Nothing to do */
5674                 break;
5675             default:
5676                 FIXME("Unhandled location %#x\n", location);
5677         }
5678         surface->flags &= ~SFLAG_DISCARDED;
5679         surface->flags |= location;
5680         surface->ds_current_size.cx = surface->resource.width;
5681         surface->ds_current_size.cy = surface->resource.height;
5682         return;
5683     }
5684
5685     if (!(surface->flags & SFLAG_LOCATIONS))
5686     {
5687         FIXME("No up to date depth stencil location.\n");
5688         surface->flags |= location;
5689         surface->ds_current_size.cx = surface->resource.width;
5690         surface->ds_current_size.cy = surface->resource.height;
5691         return;
5692     }
5693
5694     if (location == SFLAG_INTEXTURE)
5695     {
5696         GLint old_binding = 0;
5697         GLenum bind_target;
5698
5699         /* The render target is allowed to be smaller than the depth/stencil
5700          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5701          * than the offscreen surface. Don't overwrite the offscreen surface
5702          * with undefined data. */
5703         w = min(w, context->swapchain->desc.backbuffer_width);
5704         h = min(h, context->swapchain->desc.backbuffer_height);
5705
5706         TRACE("Copying onscreen depth buffer to depth texture.\n");
5707
5708         if (!device->depth_blt_texture)
5709             gl_info->gl_ops.gl.p_glGenTextures(1, &device->depth_blt_texture);
5710
5711         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5712          * directly on the FBO texture. That's because we need to flip. */
5713         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5714                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5715         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5716         {
5717             gl_info->gl_ops.gl.p_glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5718             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5719         }
5720         else
5721         {
5722             gl_info->gl_ops.gl.p_glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5723             bind_target = GL_TEXTURE_2D;
5724         }
5725         gl_info->gl_ops.gl.p_glBindTexture(bind_target, device->depth_blt_texture);
5726         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5727          * internal format, because the internal format might include stencil
5728          * data. In principle we should copy stencil data as well, but unless
5729          * the driver supports stencil export it's hard to do, and doesn't
5730          * seem to be needed in practice. If the hardware doesn't support
5731          * writing stencil data, the glCopyTexImage2D() call might trigger
5732          * software fallbacks. */
5733         gl_info->gl_ops.gl.p_glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5734         gl_info->gl_ops.gl.p_glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5735         gl_info->gl_ops.gl.p_glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5736         gl_info->gl_ops.gl.p_glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5737         gl_info->gl_ops.gl.p_glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5738         gl_info->gl_ops.gl.p_glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5739         gl_info->gl_ops.gl.p_glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5740         gl_info->gl_ops.gl.p_glBindTexture(bind_target, old_binding);
5741
5742         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5743                 NULL, surface, SFLAG_INTEXTURE);
5744         context_set_draw_buffer(context, GL_NONE);
5745         gl_info->gl_ops.gl.p_glReadBuffer(GL_NONE);
5746
5747         /* Do the actual blit */
5748         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5749         checkGLcall("depth_blt");
5750
5751         context_invalidate_state(context, STATE_FRAMEBUFFER);
5752
5753         if (wined3d_settings.strict_draw_ordering)
5754             gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */
5755     }
5756     else if (location == SFLAG_INDRAWABLE)
5757     {
5758         TRACE("Copying depth texture to onscreen depth buffer.\n");
5759
5760         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5761                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5762         surface_depth_blt(surface, context, surface->texture_name,
5763                 0, surface->pow2Height - h, w, h, surface->texture_target);
5764         checkGLcall("depth_blt");
5765
5766         context_invalidate_state(context, STATE_FRAMEBUFFER);
5767
5768         if (wined3d_settings.strict_draw_ordering)
5769             gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */
5770     }
5771     else
5772     {
5773         ERR("Invalid location (%#x) specified.\n", location);
5774     }
5775
5776     surface->flags |= location;
5777     surface->ds_current_size.cx = surface->resource.width;
5778     surface->ds_current_size.cy = surface->resource.height;
5779 }
5780
5781 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5782 {
5783     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5784     struct wined3d_surface *overlay;
5785
5786     TRACE("surface %p, location %s, persistent %#x.\n",
5787             surface, debug_surflocation(location), persistent);
5788
5789     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5790             && !(surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
5791             && (location & SFLAG_INDRAWABLE))
5792         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5793
5794     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5795             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5796         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5797
5798     if (persistent)
5799     {
5800         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5801                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5802         {
5803             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5804             {
5805                 TRACE("Passing to container.\n");
5806                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5807             }
5808         }
5809         surface->flags &= ~SFLAG_LOCATIONS;
5810         surface->flags |= location;
5811
5812         /* Redraw emulated overlays, if any */
5813         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5814         {
5815             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5816             {
5817                 surface_draw_overlay(overlay);
5818             }
5819         }
5820     }
5821     else
5822     {
5823         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5824         {
5825             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5826             {
5827                 TRACE("Passing to container\n");
5828                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5829             }
5830         }
5831         surface->flags &= ~location;
5832     }
5833
5834     if (!(surface->flags & SFLAG_LOCATIONS))
5835     {
5836         ERR("Surface %p does not have any up to date location.\n", surface);
5837     }
5838 }
5839
5840 static DWORD resource_access_from_location(DWORD location)
5841 {
5842     switch (location)
5843     {
5844         case SFLAG_INSYSMEM:
5845             return WINED3D_RESOURCE_ACCESS_CPU;
5846
5847         case SFLAG_INDRAWABLE:
5848         case SFLAG_INSRGBTEX:
5849         case SFLAG_INTEXTURE:
5850         case SFLAG_INRB_MULTISAMPLE:
5851         case SFLAG_INRB_RESOLVED:
5852             return WINED3D_RESOURCE_ACCESS_GPU;
5853
5854         default:
5855             FIXME("Unhandled location %#x.\n", location);
5856             return 0;
5857     }
5858 }
5859
5860 static void surface_load_sysmem(struct wined3d_surface *surface,
5861         const struct wined3d_gl_info *gl_info, const RECT *rect)
5862 {
5863     surface_prepare_system_memory(surface);
5864
5865     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED))
5866         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5867
5868     /* Download the surface to system memory. */
5869     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5870     {
5871         struct wined3d_device *device = surface->resource.device;
5872         struct wined3d_context *context;
5873
5874         /* TODO: Use already acquired context when possible. */
5875         context = context_acquire(device, NULL);
5876
5877         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5878         surface_download_data(surface, gl_info);
5879
5880         context_release(context);
5881
5882         return;
5883     }
5884
5885     if (surface->flags & SFLAG_INDRAWABLE)
5886     {
5887         read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5888                 wined3d_surface_get_pitch(surface));
5889         return;
5890     }
5891
5892     FIXME("Can't load surface %p with location flags %#x into sysmem.\n",
5893             surface, surface->flags & SFLAG_LOCATIONS);
5894 }
5895
5896 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5897         const struct wined3d_gl_info *gl_info, const RECT *rect)
5898 {
5899     struct wined3d_device *device = surface->resource.device;
5900     enum wined3d_conversion_type convert;
5901     struct wined3d_format format;
5902     UINT byte_count;
5903     BYTE *mem;
5904
5905     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5906     {
5907         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5908         return WINED3DERR_INVALIDCALL;
5909     }
5910
5911     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5912         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5913
5914     if (surface->flags & SFLAG_INTEXTURE)
5915     {
5916         RECT r;
5917
5918         surface_get_rect(surface, rect, &r);
5919         surface_blt_to_drawable(device, WINED3D_TEXF_POINT, FALSE, surface, &r, surface, &r);
5920
5921         return WINED3D_OK;
5922     }
5923
5924     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5925     {
5926         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5927          * path through sysmem. */
5928         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5929     }
5930
5931     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5932
5933     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5934      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5935      * called. */
5936     if ((convert != WINED3D_CT_NONE) && (surface->flags & SFLAG_PBO))
5937     {
5938         struct wined3d_context *context;
5939
5940         TRACE("Removing the pbo attached to surface %p.\n", surface);
5941
5942         /* TODO: Use already acquired context when possible. */
5943         context = context_acquire(device, NULL);
5944
5945         surface_remove_pbo(surface, gl_info);
5946
5947         context_release(context);
5948     }
5949
5950     if ((convert != WINED3D_CT_NONE) && surface->resource.allocatedMemory)
5951     {
5952         UINT height = surface->resource.height;
5953         UINT width = surface->resource.width;
5954         UINT src_pitch, dst_pitch;
5955
5956         byte_count = format.conv_byte_count;
5957         src_pitch = wined3d_surface_get_pitch(surface);
5958
5959         /* Stick to the alignment for the converted surface too, makes it
5960          * easier to load the surface. */
5961         dst_pitch = width * byte_count;
5962         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5963
5964         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5965         {
5966             ERR("Out of memory (%u).\n", dst_pitch * height);
5967             return E_OUTOFMEMORY;
5968         }
5969
5970         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5971                 src_pitch, width, height, dst_pitch, convert, surface);
5972
5973         surface->flags |= SFLAG_CONVERTED;
5974     }
5975     else
5976     {
5977         surface->flags &= ~SFLAG_CONVERTED;
5978         mem = surface->resource.allocatedMemory;
5979         byte_count = format.byte_count;
5980     }
5981
5982     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
5983
5984     /* Don't delete PBO memory. */
5985     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
5986         HeapFree(GetProcessHeap(), 0, mem);
5987
5988     return WINED3D_OK;
5989 }
5990
5991 static HRESULT surface_load_texture(struct wined3d_surface *surface,
5992         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
5993 {
5994     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
5995     struct wined3d_device *device = surface->resource.device;
5996     enum wined3d_conversion_type convert;
5997     struct wined3d_context *context;
5998     UINT width, src_pitch, dst_pitch;
5999     struct wined3d_bo_address data;
6000     struct wined3d_format format;
6001     POINT dst_point = {0, 0};
6002     BYTE *mem;
6003
6004     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
6005             && surface_is_offscreen(surface)
6006             && (surface->flags & SFLAG_INDRAWABLE))
6007     {
6008         surface_load_fb_texture(surface, srgb);
6009
6010         return WINED3D_OK;
6011     }
6012
6013     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
6014             && (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB)
6015             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6016                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6017                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6018     {
6019         if (srgb)
6020             surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, SFLAG_INTEXTURE,
6021                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
6022         else
6023             surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, SFLAG_INSRGBTEX,
6024                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
6025
6026         return WINED3D_OK;
6027     }
6028
6029     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED)
6030             && (!srgb || (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB))
6031             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6032                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6033                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6034     {
6035         DWORD src_location = surface->flags & SFLAG_INRB_RESOLVED ? SFLAG_INRB_RESOLVED : SFLAG_INRB_MULTISAMPLE;
6036         DWORD dst_location = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
6037         RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6038
6039         surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, src_location,
6040                 &rect, surface, dst_location, &rect);
6041
6042         return WINED3D_OK;
6043     }
6044
6045     /* Upload from system memory */
6046
6047     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
6048             TRUE /* We will use textures */, &format, &convert);
6049
6050     if (srgb)
6051     {
6052         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
6053         {
6054             /* Performance warning... */
6055             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6056             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6057         }
6058     }
6059     else
6060     {
6061         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6062         {
6063             /* Performance warning... */
6064             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6065             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6066         }
6067     }
6068
6069     if (!(surface->flags & SFLAG_INSYSMEM))
6070     {
6071         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6072         /* Lets hope we get it from somewhere... */
6073         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6074     }
6075
6076     /* TODO: Use already acquired context when possible. */
6077     context = context_acquire(device, NULL);
6078
6079     surface_prepare_texture(surface, context, srgb);
6080     surface_bind_and_dirtify(surface, context, srgb);
6081
6082     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6083     {
6084         surface->flags |= SFLAG_GLCKEY;
6085         surface->gl_color_key = surface->src_blt_color_key;
6086     }
6087     else surface->flags &= ~SFLAG_GLCKEY;
6088
6089     width = surface->resource.width;
6090     src_pitch = wined3d_surface_get_pitch(surface);
6091
6092     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6093      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6094      * called. */
6095     if ((convert != WINED3D_CT_NONE || format.convert) && (surface->flags & SFLAG_PBO))
6096     {
6097         TRACE("Removing the pbo attached to surface %p.\n", surface);
6098         surface_remove_pbo(surface, gl_info);
6099     }
6100
6101     if (format.convert)
6102     {
6103         /* This code is entered for texture formats which need a fixup. */
6104         UINT height = surface->resource.height;
6105
6106         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6107         dst_pitch = width * format.conv_byte_count;
6108         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6109
6110         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6111         {
6112             ERR("Out of memory (%u).\n", dst_pitch * height);
6113             context_release(context);
6114             return E_OUTOFMEMORY;
6115         }
6116         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6117         format.byte_count = format.conv_byte_count;
6118         src_pitch = dst_pitch;
6119     }
6120     else if (convert != WINED3D_CT_NONE && surface->resource.allocatedMemory)
6121     {
6122         /* This code is only entered for color keying fixups */
6123         UINT height = surface->resource.height;
6124
6125         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6126         dst_pitch = width * format.conv_byte_count;
6127         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6128
6129         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6130         {
6131             ERR("Out of memory (%u).\n", dst_pitch * height);
6132             context_release(context);
6133             return E_OUTOFMEMORY;
6134         }
6135         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6136                 width, height, dst_pitch, convert, surface);
6137         format.byte_count = format.conv_byte_count;
6138         src_pitch = dst_pitch;
6139     }
6140     else
6141     {
6142         mem = surface->resource.allocatedMemory;
6143     }
6144
6145     data.buffer_object = surface->pbo;
6146     data.addr = mem;
6147     surface_upload_data(surface, gl_info, &format, &src_rect, src_pitch, &dst_point, srgb, &data);
6148
6149     context_release(context);
6150
6151     /* Don't delete PBO memory. */
6152     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6153         HeapFree(GetProcessHeap(), 0, mem);
6154
6155     return WINED3D_OK;
6156 }
6157
6158 static void surface_multisample_resolve(struct wined3d_surface *surface)
6159 {
6160     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6161
6162     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6163         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6164
6165     surface_blt_fbo(surface->resource.device, WINED3D_TEXF_POINT,
6166             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6167 }
6168
6169 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6170 {
6171     struct wined3d_device *device = surface->resource.device;
6172     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6173     HRESULT hr;
6174
6175     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6176
6177     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6178     {
6179         if (location == SFLAG_INTEXTURE && surface->flags & SFLAG_INDRAWABLE)
6180         {
6181             struct wined3d_context *context = context_acquire(device, NULL);
6182             surface_load_ds_location(surface, context, location);
6183             context_release(context);
6184             return WINED3D_OK;
6185         }
6186         else if (location & surface->flags && surface->draw_binding != SFLAG_INDRAWABLE)
6187         {
6188             /* Already up to date, nothing to do. */
6189             return WINED3D_OK;
6190         }
6191         else
6192         {
6193             FIXME("Unimplemented copy from %s to %s for depth/stencil buffers.\n",
6194                     debug_surflocation(surface->flags & SFLAG_LOCATIONS), debug_surflocation(location));
6195             return WINED3DERR_INVALIDCALL;
6196         }
6197     }
6198
6199     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6200         location = SFLAG_INTEXTURE;
6201
6202     if (surface->flags & location)
6203     {
6204         TRACE("Location already up to date.\n");
6205
6206         if (location == SFLAG_INSYSMEM && !(surface->flags & SFLAG_PBO)
6207                 && surface_need_pbo(surface, gl_info))
6208             surface_load_pbo(surface, gl_info);
6209
6210         return WINED3D_OK;
6211     }
6212
6213     if (WARN_ON(d3d_surface))
6214     {
6215         DWORD required_access = resource_access_from_location(location);
6216         if ((surface->resource.access_flags & required_access) != required_access)
6217             WARN("Operation requires %#x access, but surface only has %#x.\n",
6218                     required_access, surface->resource.access_flags);
6219     }
6220
6221     if (!(surface->flags & SFLAG_LOCATIONS))
6222     {
6223         ERR("Surface %p does not have any up to date location.\n", surface);
6224         surface->flags |= SFLAG_LOST;
6225         return WINED3DERR_DEVICELOST;
6226     }
6227
6228     switch (location)
6229     {
6230         case SFLAG_INSYSMEM:
6231             surface_load_sysmem(surface, gl_info, rect);
6232             break;
6233
6234         case SFLAG_INDRAWABLE:
6235             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6236                 return hr;
6237             break;
6238
6239         case SFLAG_INRB_RESOLVED:
6240             surface_multisample_resolve(surface);
6241             break;
6242
6243         case SFLAG_INTEXTURE:
6244         case SFLAG_INSRGBTEX:
6245             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6246                 return hr;
6247             break;
6248
6249         default:
6250             ERR("Don't know how to handle location %#x.\n", location);
6251             break;
6252     }
6253
6254     if (!rect)
6255     {
6256         surface->flags |= location;
6257
6258         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6259             surface_evict_sysmem(surface);
6260     }
6261
6262     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6263             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6264     {
6265         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6266     }
6267
6268     return WINED3D_OK;
6269 }
6270
6271 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6272 {
6273     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6274
6275     /* Not on a swapchain - must be offscreen */
6276     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6277
6278     /* The front buffer is always onscreen */
6279     if (surface == swapchain->front_buffer) return FALSE;
6280
6281     /* If the swapchain is rendered to an FBO, the backbuffer is
6282      * offscreen, otherwise onscreen */
6283     return swapchain->render_to_fbo;
6284 }
6285
6286 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6287 /* Context activation is done by the caller. */
6288 static void ffp_blit_free(struct wined3d_device *device) { }
6289
6290 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6291 /* Context activation is done by the caller. */
6292 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6293 {
6294     BYTE table[256][4];
6295     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) != 0;
6296     GLenum target;
6297
6298     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
6299         target = surface->container.u.texture->target;
6300     else
6301         target = surface->texture_target;
6302
6303     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6304
6305     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6306     GL_EXTCALL(glColorTableEXT(target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6307 }
6308
6309 /* Context activation is done by the caller. */
6310 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6311 {
6312     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6313     const struct wined3d_gl_info *gl_info = context->gl_info;
6314     GLenum target;
6315
6316     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
6317         target = surface->container.u.texture->target;
6318     else
6319         target = surface->texture_target;
6320
6321     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6322      * else the surface is converted in software at upload time in LoadLocation.
6323      */
6324     if (!(surface->flags & SFLAG_CONVERTED) && fixup == COMPLEX_FIXUP_P8
6325             && gl_info->supported[EXT_PALETTED_TEXTURE])
6326         ffp_blit_p8_upload_palette(surface, gl_info);
6327
6328     gl_info->gl_ops.gl.p_glEnable(target);
6329     checkGLcall("glEnable(target)");
6330
6331     return WINED3D_OK;
6332 }
6333
6334 /* Context activation is done by the caller. */
6335 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6336 {
6337     gl_info->gl_ops.gl.p_glDisable(GL_TEXTURE_2D);
6338     checkGLcall("glDisable(GL_TEXTURE_2D)");
6339     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6340     {
6341         gl_info->gl_ops.gl.p_glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6342         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6343     }
6344     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6345     {
6346         gl_info->gl_ops.gl.p_glDisable(GL_TEXTURE_RECTANGLE_ARB);
6347         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6348     }
6349 }
6350
6351 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6352         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
6353         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
6354 {
6355     enum complex_fixup src_fixup;
6356
6357     switch (blit_op)
6358     {
6359         case WINED3D_BLIT_OP_COLOR_BLIT:
6360             if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM)
6361                 return FALSE;
6362
6363             src_fixup = get_complex_fixup(src_format->color_fixup);
6364             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6365             {
6366                 TRACE("Checking support for fixup:\n");
6367                 dump_color_fixup_desc(src_format->color_fixup);
6368             }
6369
6370             if (!is_identity_fixup(dst_format->color_fixup))
6371             {
6372                 TRACE("Destination fixups are not supported\n");
6373                 return FALSE;
6374             }
6375
6376             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6377             {
6378                 TRACE("P8 fixup supported\n");
6379                 return TRUE;
6380             }
6381
6382             /* We only support identity conversions. */
6383             if (is_identity_fixup(src_format->color_fixup))
6384             {
6385                 TRACE("[OK]\n");
6386                 return TRUE;
6387             }
6388
6389             TRACE("[FAILED]\n");
6390             return FALSE;
6391
6392         case WINED3D_BLIT_OP_COLOR_FILL:
6393             if (dst_pool == WINED3D_POOL_SYSTEM_MEM)
6394                 return FALSE;
6395
6396             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6397             {
6398                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6399                     return FALSE;
6400             }
6401             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6402             {
6403                 TRACE("Color fill not supported\n");
6404                 return FALSE;
6405             }
6406
6407             /* FIXME: We should reject color fills on formats with fixups,
6408              * but this would break P8 color fills for example. */
6409
6410             return TRUE;
6411
6412         case WINED3D_BLIT_OP_DEPTH_FILL:
6413             return TRUE;
6414
6415         default:
6416             TRACE("Unsupported blit_op=%d\n", blit_op);
6417             return FALSE;
6418     }
6419 }
6420
6421 /* Do not call while under the GL lock. */
6422 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6423         const RECT *dst_rect, const struct wined3d_color *color)
6424 {
6425     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6426     struct wined3d_fb_state fb = {&dst_surface, NULL};
6427
6428     device_clear_render_targets(device, 1, &fb, 1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6429
6430     return WINED3D_OK;
6431 }
6432
6433 /* Do not call while under the GL lock. */
6434 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6435         struct wined3d_surface *surface, const RECT *rect, float depth)
6436 {
6437     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6438     struct wined3d_fb_state fb = {NULL, surface};
6439
6440     device_clear_render_targets(device, 0, &fb, 1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6441
6442     return WINED3D_OK;
6443 }
6444
6445 const struct blit_shader ffp_blit =  {
6446     ffp_blit_alloc,
6447     ffp_blit_free,
6448     ffp_blit_set,
6449     ffp_blit_unset,
6450     ffp_blit_supported,
6451     ffp_blit_color_fill,
6452     ffp_blit_depth_fill,
6453 };
6454
6455 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6456 {
6457     return WINED3D_OK;
6458 }
6459
6460 /* Context activation is done by the caller. */
6461 static void cpu_blit_free(struct wined3d_device *device)
6462 {
6463 }
6464
6465 /* Context activation is done by the caller. */
6466 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6467 {
6468     return WINED3D_OK;
6469 }
6470
6471 /* Context activation is done by the caller. */
6472 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6473 {
6474 }
6475
6476 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6477         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
6478         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
6479 {
6480     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6481     {
6482         return TRUE;
6483     }
6484
6485     return FALSE;
6486 }
6487
6488 static HRESULT surface_cpu_blt_compressed(const BYTE *src_data, BYTE *dst_data,
6489         UINT src_pitch, UINT dst_pitch, UINT update_w, UINT update_h,
6490         const struct wined3d_format *format, DWORD flags, const WINEDDBLTFX *fx)
6491 {
6492     UINT row_block_count;
6493     const BYTE *src_row;
6494     BYTE *dst_row;
6495     UINT x, y;
6496
6497     src_row = src_data;
6498     dst_row = dst_data;
6499
6500     row_block_count = (update_w + format->block_width - 1) / format->block_width;
6501
6502     if (!flags)
6503     {
6504         for (y = 0; y < update_h; y += format->block_height)
6505         {
6506             memcpy(dst_row, src_row, row_block_count * format->block_byte_count);
6507             src_row += src_pitch;
6508             dst_row += dst_pitch;
6509         }
6510
6511         return WINED3D_OK;
6512     }
6513
6514     if (flags == WINEDDBLT_DDFX && fx->dwDDFX == WINEDDBLTFX_MIRRORUPDOWN)
6515     {
6516         src_row += (((update_h / format->block_height) - 1) * src_pitch);
6517
6518         switch (format->id)
6519         {
6520             case WINED3DFMT_DXT1:
6521                 for (y = 0; y < update_h; y += format->block_height)
6522                 {
6523                     struct block
6524                     {
6525                         WORD color[2];
6526                         BYTE control_row[4];
6527                     };
6528
6529                     const struct block *s = (const struct block *)src_row;
6530                     struct block *d = (struct block *)dst_row;
6531
6532                     for (x = 0; x < row_block_count; ++x)
6533                     {
6534                         d[x].color[0] = s[x].color[0];
6535                         d[x].color[1] = s[x].color[1];
6536                         d[x].control_row[0] = s[x].control_row[3];
6537                         d[x].control_row[1] = s[x].control_row[2];
6538                         d[x].control_row[2] = s[x].control_row[1];
6539                         d[x].control_row[3] = s[x].control_row[0];
6540                     }
6541                     src_row -= src_pitch;
6542                     dst_row += dst_pitch;
6543                 }
6544                 return WINED3D_OK;
6545
6546             case WINED3DFMT_DXT3:
6547                 for (y = 0; y < update_h; y += format->block_height)
6548                 {
6549                     struct block
6550                     {
6551                         WORD alpha_row[4];
6552                         WORD color[2];
6553                         BYTE control_row[4];
6554                     };
6555
6556                     const struct block *s = (const struct block *)src_row;
6557                     struct block *d = (struct block *)dst_row;
6558
6559                     for (x = 0; x < row_block_count; ++x)
6560                     {
6561                         d[x].alpha_row[0] = s[x].alpha_row[3];
6562                         d[x].alpha_row[1] = s[x].alpha_row[2];
6563                         d[x].alpha_row[2] = s[x].alpha_row[1];
6564                         d[x].alpha_row[3] = s[x].alpha_row[0];
6565                         d[x].color[0] = s[x].color[0];
6566                         d[x].color[1] = s[x].color[1];
6567                         d[x].control_row[0] = s[x].control_row[3];
6568                         d[x].control_row[1] = s[x].control_row[2];
6569                         d[x].control_row[2] = s[x].control_row[1];
6570                         d[x].control_row[3] = s[x].control_row[0];
6571                     }
6572                     src_row -= src_pitch;
6573                     dst_row += dst_pitch;
6574                 }
6575                 return WINED3D_OK;
6576
6577             default:
6578                 FIXME("Compressed flip not implemented for format %s.\n",
6579                         debug_d3dformat(format->id));
6580                 return E_NOTIMPL;
6581         }
6582     }
6583
6584     FIXME("Unsupported blit on compressed surface (format %s, flags %#x, DDFX %#x).\n",
6585             debug_d3dformat(format->id), flags, flags & WINEDDBLT_DDFX ? fx->dwDDFX : 0);
6586
6587     return E_NOTIMPL;
6588 }
6589
6590 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6591         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6592         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter)
6593 {
6594     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6595     const struct wined3d_format *src_format, *dst_format;
6596     struct wined3d_surface *orig_src = src_surface;
6597     struct wined3d_map_desc dst_map, src_map;
6598     const BYTE *sbase = NULL;
6599     HRESULT hr = WINED3D_OK;
6600     const BYTE *sbuf;
6601     BYTE *dbuf;
6602     int x, y;
6603
6604     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6605             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6606             flags, fx, debug_d3dtexturefiltertype(filter));
6607
6608     if (src_surface == dst_surface)
6609     {
6610         wined3d_surface_map(dst_surface, &dst_map, NULL, 0);
6611         src_map = dst_map;
6612         src_format = dst_surface->resource.format;
6613         dst_format = src_format;
6614     }
6615     else
6616     {
6617         dst_format = dst_surface->resource.format;
6618         if (src_surface)
6619         {
6620             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6621             {
6622                 src_surface = surface_convert_format(src_surface, dst_format->id);
6623                 if (!src_surface)
6624                 {
6625                     /* The conv function writes a FIXME */
6626                     WARN("Cannot convert source surface format to dest format.\n");
6627                     goto release;
6628                 }
6629             }
6630             wined3d_surface_map(src_surface, &src_map, NULL, WINED3D_MAP_READONLY);
6631             src_format = src_surface->resource.format;
6632         }
6633         else
6634         {
6635             src_format = dst_format;
6636         }
6637
6638         wined3d_surface_map(dst_surface, &dst_map, dst_rect, 0);
6639     }
6640
6641     bpp = dst_surface->resource.format->byte_count;
6642     srcheight = src_rect->bottom - src_rect->top;
6643     srcwidth = src_rect->right - src_rect->left;
6644     dstheight = dst_rect->bottom - dst_rect->top;
6645     dstwidth = dst_rect->right - dst_rect->left;
6646     width = (dst_rect->right - dst_rect->left) * bpp;
6647
6648     if (src_surface)
6649         sbase = (BYTE *)src_map.data
6650                 + ((src_rect->top / src_format->block_height) * src_map.row_pitch)
6651                 + ((src_rect->left / src_format->block_width) * src_format->block_byte_count);
6652     if (src_surface != dst_surface)
6653         dbuf = dst_map.data;
6654     else
6655         dbuf = (BYTE *)dst_map.data
6656                 + ((dst_rect->top / dst_format->block_height) * dst_map.row_pitch)
6657                 + ((dst_rect->left / dst_format->block_width) * dst_format->block_byte_count);
6658
6659     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_BLOCKS)
6660     {
6661         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6662
6663         if (src_surface == dst_surface)
6664         {
6665             FIXME("Only plain blits supported on compressed surfaces.\n");
6666             hr = E_NOTIMPL;
6667             goto release;
6668         }
6669
6670         if (srcheight != dstheight || srcwidth != dstwidth)
6671         {
6672             WARN("Stretching not supported on compressed surfaces.\n");
6673             hr = WINED3DERR_INVALIDCALL;
6674             goto release;
6675         }
6676
6677         if (!surface_check_block_align(src_surface, src_rect))
6678         {
6679             WARN("Source rectangle not block-aligned.\n");
6680             hr = WINED3DERR_INVALIDCALL;
6681             goto release;
6682         }
6683
6684         if (!surface_check_block_align(dst_surface, dst_rect))
6685         {
6686             WARN("Destination rectangle not block-aligned.\n");
6687             hr = WINED3DERR_INVALIDCALL;
6688             goto release;
6689         }
6690
6691         hr = surface_cpu_blt_compressed(sbase, dbuf,
6692                 src_map.row_pitch, dst_map.row_pitch, dstwidth, dstheight,
6693                 src_format, flags, fx);
6694         goto release;
6695     }
6696
6697     /* First, all the 'source-less' blits */
6698     if (flags & WINEDDBLT_COLORFILL)
6699     {
6700         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, fx->u5.dwFillColor);
6701         flags &= ~WINEDDBLT_COLORFILL;
6702     }
6703
6704     if (flags & WINEDDBLT_DEPTHFILL)
6705     {
6706         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6707     }
6708     if (flags & WINEDDBLT_ROP)
6709     {
6710         /* Catch some degenerate cases here. */
6711         switch (fx->dwROP)
6712         {
6713             case BLACKNESS:
6714                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, 0);
6715                 break;
6716             case 0xaa0029: /* No-op */
6717                 break;
6718             case WHITENESS:
6719                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, ~0U);
6720                 break;
6721             case SRCCOPY: /* Well, we do that below? */
6722                 break;
6723             default:
6724                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6725                 goto error;
6726         }
6727         flags &= ~WINEDDBLT_ROP;
6728     }
6729     if (flags & WINEDDBLT_DDROPS)
6730     {
6731         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6732     }
6733     /* Now the 'with source' blits. */
6734     if (src_surface)
6735     {
6736         int sx, xinc, sy, yinc;
6737
6738         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6739             goto release;
6740
6741         if (filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT
6742                 && (srcwidth != dstwidth || srcheight != dstheight))
6743         {
6744             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6745             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6746         }
6747
6748         xinc = (srcwidth << 16) / dstwidth;
6749         yinc = (srcheight << 16) / dstheight;
6750
6751         if (!flags)
6752         {
6753             /* No effects, we can cheat here. */
6754             if (dstwidth == srcwidth)
6755             {
6756                 if (dstheight == srcheight)
6757                 {
6758                     /* No stretching in either direction. This needs to be as
6759                      * fast as possible. */
6760                     sbuf = sbase;
6761
6762                     /* Check for overlapping surfaces. */
6763                     if (src_surface != dst_surface || dst_rect->top < src_rect->top
6764                             || dst_rect->right <= src_rect->left || src_rect->right <= dst_rect->left)
6765                     {
6766                         /* No overlap, or dst above src, so copy from top downwards. */
6767                         for (y = 0; y < dstheight; ++y)
6768                         {
6769                             memcpy(dbuf, sbuf, width);
6770                             sbuf += src_map.row_pitch;
6771                             dbuf += dst_map.row_pitch;
6772                         }
6773                     }
6774                     else if (dst_rect->top > src_rect->top)
6775                     {
6776                         /* Copy from bottom upwards. */
6777                         sbuf += src_map.row_pitch * dstheight;
6778                         dbuf += dst_map.row_pitch * dstheight;
6779                         for (y = 0; y < dstheight; ++y)
6780                         {
6781                             sbuf -= src_map.row_pitch;
6782                             dbuf -= dst_map.row_pitch;
6783                             memcpy(dbuf, sbuf, width);
6784                         }
6785                     }
6786                     else
6787                     {
6788                         /* Src and dst overlapping on the same line, use memmove. */
6789                         for (y = 0; y < dstheight; ++y)
6790                         {
6791                             memmove(dbuf, sbuf, width);
6792                             sbuf += src_map.row_pitch;
6793                             dbuf += dst_map.row_pitch;
6794                         }
6795                     }
6796                 }
6797                 else
6798                 {
6799                     /* Stretching in y direction only. */
6800                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6801                     {
6802                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6803                         memcpy(dbuf, sbuf, width);
6804                         dbuf += dst_map.row_pitch;
6805                     }
6806                 }
6807             }
6808             else
6809             {
6810                 /* Stretching in X direction. */
6811                 int last_sy = -1;
6812                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6813                 {
6814                     sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6815
6816                     if ((sy >> 16) == (last_sy >> 16))
6817                     {
6818                         /* This source row is the same as last source row -
6819                          * Copy the already stretched row. */
6820                         memcpy(dbuf, dbuf - dst_map.row_pitch, width);
6821                     }
6822                     else
6823                     {
6824 #define STRETCH_ROW(type) \
6825 do { \
6826     const type *s = (const type *)sbuf; \
6827     type *d = (type *)dbuf; \
6828     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6829         d[x] = s[sx >> 16]; \
6830 } while(0)
6831
6832                         switch(bpp)
6833                         {
6834                             case 1:
6835                                 STRETCH_ROW(BYTE);
6836                                 break;
6837                             case 2:
6838                                 STRETCH_ROW(WORD);
6839                                 break;
6840                             case 4:
6841                                 STRETCH_ROW(DWORD);
6842                                 break;
6843                             case 3:
6844                             {
6845                                 const BYTE *s;
6846                                 BYTE *d = dbuf;
6847                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6848                                 {
6849                                     DWORD pixel;
6850
6851                                     s = sbuf + 3 * (sx >> 16);
6852                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6853                                     d[0] = (pixel      ) & 0xff;
6854                                     d[1] = (pixel >>  8) & 0xff;
6855                                     d[2] = (pixel >> 16) & 0xff;
6856                                     d += 3;
6857                                 }
6858                                 break;
6859                             }
6860                             default:
6861                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6862                                 hr = WINED3DERR_NOTAVAILABLE;
6863                                 goto error;
6864                         }
6865 #undef STRETCH_ROW
6866                     }
6867                     dbuf += dst_map.row_pitch;
6868                     last_sy = sy;
6869                 }
6870             }
6871         }
6872         else
6873         {
6874             LONG dstyinc = dst_map.row_pitch, dstxinc = bpp;
6875             DWORD keylow = 0xffffffff, keyhigh = 0, keymask = 0xffffffff;
6876             DWORD destkeylow = 0x0, destkeyhigh = 0xffffffff, destkeymask = 0xffffffff;
6877             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6878             {
6879                 /* The color keying flags are checked for correctness in ddraw */
6880                 if (flags & WINEDDBLT_KEYSRC)
6881                 {
6882                     keylow  = src_surface->src_blt_color_key.color_space_low_value;
6883                     keyhigh = src_surface->src_blt_color_key.color_space_high_value;
6884                 }
6885                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6886                 {
6887                     keylow = fx->ddckSrcColorkey.color_space_low_value;
6888                     keyhigh = fx->ddckSrcColorkey.color_space_high_value;
6889                 }
6890
6891                 if (flags & WINEDDBLT_KEYDEST)
6892                 {
6893                     /* Destination color keys are taken from the source surface! */
6894                     destkeylow = src_surface->dst_blt_color_key.color_space_low_value;
6895                     destkeyhigh = src_surface->dst_blt_color_key.color_space_high_value;
6896                 }
6897                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6898                 {
6899                     destkeylow = fx->ddckDestColorkey.color_space_low_value;
6900                     destkeyhigh = fx->ddckDestColorkey.color_space_high_value;
6901                 }
6902
6903                 if (bpp == 1)
6904                 {
6905                     keymask = 0xff;
6906                 }
6907                 else
6908                 {
6909                     DWORD masks[3];
6910                     get_color_masks(src_format, masks);
6911                     keymask = masks[0]
6912                             | masks[1]
6913                             | masks[2];
6914                 }
6915                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6916             }
6917
6918             if (flags & WINEDDBLT_DDFX)
6919             {
6920                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6921                 LONG tmpxy;
6922                 dTopLeft     = dbuf;
6923                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6924                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dst_map.row_pitch);
6925                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6926
6927                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6928                 {
6929                     /* I don't think we need to do anything about this flag */
6930                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6931                 }
6932                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6933                 {
6934                     tmp          = dTopRight;
6935                     dTopRight    = dTopLeft;
6936                     dTopLeft     = tmp;
6937                     tmp          = dBottomRight;
6938                     dBottomRight = dBottomLeft;
6939                     dBottomLeft  = tmp;
6940                     dstxinc = dstxinc * -1;
6941                 }
6942                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6943                 {
6944                     tmp          = dTopLeft;
6945                     dTopLeft     = dBottomLeft;
6946                     dBottomLeft  = tmp;
6947                     tmp          = dTopRight;
6948                     dTopRight    = dBottomRight;
6949                     dBottomRight = tmp;
6950                     dstyinc = dstyinc * -1;
6951                 }
6952                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6953                 {
6954                     /* I don't think we need to do anything about this flag */
6955                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6956                 }
6957                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6958                 {
6959                     tmp          = dBottomRight;
6960                     dBottomRight = dTopLeft;
6961                     dTopLeft     = tmp;
6962                     tmp          = dBottomLeft;
6963                     dBottomLeft  = dTopRight;
6964                     dTopRight    = tmp;
6965                     dstxinc = dstxinc * -1;
6966                     dstyinc = dstyinc * -1;
6967                 }
6968                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6969                 {
6970                     tmp          = dTopLeft;
6971                     dTopLeft     = dBottomLeft;
6972                     dBottomLeft  = dBottomRight;
6973                     dBottomRight = dTopRight;
6974                     dTopRight    = tmp;
6975                     tmpxy   = dstxinc;
6976                     dstxinc = dstyinc;
6977                     dstyinc = tmpxy;
6978                     dstxinc = dstxinc * -1;
6979                 }
6980                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6981                 {
6982                     tmp          = dTopLeft;
6983                     dTopLeft     = dTopRight;
6984                     dTopRight    = dBottomRight;
6985                     dBottomRight = dBottomLeft;
6986                     dBottomLeft  = tmp;
6987                     tmpxy   = dstxinc;
6988                     dstxinc = dstyinc;
6989                     dstyinc = tmpxy;
6990                     dstyinc = dstyinc * -1;
6991                 }
6992                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6993                 {
6994                     /* I don't think we need to do anything about this flag */
6995                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6996                 }
6997                 dbuf = dTopLeft;
6998                 flags &= ~(WINEDDBLT_DDFX);
6999             }
7000
7001 #define COPY_COLORKEY_FX(type) \
7002 do { \
7003     const type *s; \
7004     type *d = (type *)dbuf, *dx, tmp; \
7005     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
7006     { \
7007         s = (const type *)(sbase + (sy >> 16) * src_map.row_pitch); \
7008         dx = d; \
7009         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
7010         { \
7011             tmp = s[sx >> 16]; \
7012             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
7013                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
7014             { \
7015                 dx[0] = tmp; \
7016             } \
7017             dx = (type *)(((BYTE *)dx) + dstxinc); \
7018         } \
7019         d = (type *)(((BYTE *)d) + dstyinc); \
7020     } \
7021 } while(0)
7022
7023             switch (bpp)
7024             {
7025                 case 1:
7026                     COPY_COLORKEY_FX(BYTE);
7027                     break;
7028                 case 2:
7029                     COPY_COLORKEY_FX(WORD);
7030                     break;
7031                 case 4:
7032                     COPY_COLORKEY_FX(DWORD);
7033                     break;
7034                 case 3:
7035                 {
7036                     const BYTE *s;
7037                     BYTE *d = dbuf, *dx;
7038                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
7039                     {
7040                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
7041                         dx = d;
7042                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7043                         {
7044                             DWORD pixel, dpixel = 0;
7045                             s = sbuf + 3 * (sx>>16);
7046                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7047                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7048                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7049                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7050                             {
7051                                 dx[0] = (pixel      ) & 0xff;
7052                                 dx[1] = (pixel >>  8) & 0xff;
7053                                 dx[2] = (pixel >> 16) & 0xff;
7054                             }
7055                             dx += dstxinc;
7056                         }
7057                         d += dstyinc;
7058                     }
7059                     break;
7060                 }
7061                 default:
7062                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7063                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7064                     hr = WINED3DERR_NOTAVAILABLE;
7065                     goto error;
7066 #undef COPY_COLORKEY_FX
7067             }
7068         }
7069     }
7070
7071 error:
7072     if (flags && FIXME_ON(d3d_surface))
7073     {
7074         FIXME("\tUnsupported flags: %#x.\n", flags);
7075     }
7076
7077 release:
7078     wined3d_surface_unmap(dst_surface);
7079     if (src_surface && src_surface != dst_surface)
7080         wined3d_surface_unmap(src_surface);
7081     /* Release the converted surface, if any. */
7082     if (src_surface && src_surface != orig_src)
7083         wined3d_surface_decref(src_surface);
7084
7085     return hr;
7086 }
7087
7088 /* Do not call while under the GL lock. */
7089 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7090         const RECT *dst_rect, const struct wined3d_color *color)
7091 {
7092     static const RECT src_rect;
7093     WINEDDBLTFX BltFx;
7094
7095     memset(&BltFx, 0, sizeof(BltFx));
7096     BltFx.dwSize = sizeof(BltFx);
7097     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7098     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7099             WINEDDBLT_COLORFILL, &BltFx, WINED3D_TEXF_POINT);
7100 }
7101
7102 /* Do not call while under the GL lock. */
7103 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7104         struct wined3d_surface *surface, const RECT *rect, float depth)
7105 {
7106     FIXME("Depth filling not implemented by cpu_blit.\n");
7107     return WINED3DERR_INVALIDCALL;
7108 }
7109
7110 const struct blit_shader cpu_blit =  {
7111     cpu_blit_alloc,
7112     cpu_blit_free,
7113     cpu_blit_set,
7114     cpu_blit_unset,
7115     cpu_blit_supported,
7116     cpu_blit_color_fill,
7117     cpu_blit_depth_fill,
7118 };
7119
7120 static HRESULT surface_init(struct wined3d_surface *surface, UINT alignment, UINT width, UINT height,
7121         enum wined3d_multisample_type multisample_type, UINT multisample_quality,
7122         struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7123         enum wined3d_pool pool, DWORD flags, void *parent, const struct wined3d_parent_ops *parent_ops)
7124 {
7125     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7126     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7127     BOOL lockable = flags & WINED3D_SURFACE_MAPPABLE;
7128     unsigned int resource_size;
7129     HRESULT hr;
7130
7131     if (multisample_quality > 0)
7132     {
7133         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7134         multisample_quality = 0;
7135     }
7136
7137     /* Quick lockable sanity check.
7138      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7139      * this function is too deep to need to care about things like this.
7140      * Levels need to be checked too, since they all affect what can be done. */
7141     switch (pool)
7142     {
7143         case WINED3D_POOL_SCRATCH:
7144             if (!lockable)
7145             {
7146                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7147                         "which are mutually exclusive, setting lockable to TRUE.\n");
7148                 lockable = TRUE;
7149             }
7150             break;
7151
7152         case WINED3D_POOL_SYSTEM_MEM:
7153             if (!lockable)
7154                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7155             break;
7156
7157         case WINED3D_POOL_MANAGED:
7158             if (usage & WINED3DUSAGE_DYNAMIC)
7159                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7160             break;
7161
7162         case WINED3D_POOL_DEFAULT:
7163             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7164                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7165             break;
7166
7167         default:
7168             FIXME("Unknown pool %#x.\n", pool);
7169             break;
7170     };
7171
7172     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3D_POOL_DEFAULT)
7173         FIXME("Trying to create a render target that isn't in the default pool.\n");
7174
7175     /* FIXME: Check that the format is supported by the device. */
7176
7177     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7178     if (!resource_size)
7179         return WINED3DERR_INVALIDCALL;
7180
7181     if (device->wined3d->flags & WINED3D_NO3D)
7182         surface->surface_ops = &gdi_surface_ops;
7183     else
7184         surface->surface_ops = &surface_ops;
7185
7186     hr = resource_init(&surface->resource, device, WINED3D_RTYPE_SURFACE, format,
7187             multisample_type, multisample_quality, usage, pool, width, height, 1,
7188             resource_size, parent, parent_ops, &surface_resource_ops);
7189     if (FAILED(hr))
7190     {
7191         WARN("Failed to initialize resource, returning %#x.\n", hr);
7192         return hr;
7193     }
7194
7195     /* "Standalone" surface. */
7196     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7197
7198     list_init(&surface->overlays);
7199
7200     /* Flags */
7201     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7202     if (flags & WINED3D_SURFACE_DISCARD)
7203         surface->flags |= SFLAG_DISCARD;
7204     if (flags & WINED3D_SURFACE_PIN_SYSMEM)
7205         surface->flags |= SFLAG_PIN_SYSMEM;
7206     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7207         surface->flags |= SFLAG_LOCKABLE;
7208     /* I'm not sure if this qualifies as a hack or as an optimization. It
7209      * seems reasonable to assume that lockable render targets will get
7210      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7211      * creation. However, the other reason we want to do this is that several
7212      * ddraw applications access surface memory while the surface isn't
7213      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7214      * future locks prevents these from crashing. */
7215     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7216         surface->flags |= SFLAG_DYNLOCK;
7217
7218     /* Mark the texture as dirty so that it gets loaded first time around. */
7219     surface_add_dirty_rect(surface, NULL);
7220     list_init(&surface->renderbuffers);
7221
7222     TRACE("surface %p, memory %p, size %u\n",
7223             surface, surface->resource.allocatedMemory, surface->resource.size);
7224
7225     /* Call the private setup routine */
7226     hr = surface->surface_ops->surface_private_setup(surface);
7227     if (FAILED(hr))
7228     {
7229         ERR("Private setup failed, returning %#x\n", hr);
7230         surface_cleanup(surface);
7231         return hr;
7232     }
7233
7234     /* Similar to lockable rendertargets above, creating the DIB section
7235      * during surface initialization prevents the sysmem pointer from changing
7236      * after a wined3d_surface_getdc() call. */
7237     if ((usage & WINED3DUSAGE_OWNDC) && !surface->hDC
7238             && SUCCEEDED(surface_create_dib_section(surface)))
7239     {
7240         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
7241         surface->resource.heapMemory = NULL;
7242         surface->resource.allocatedMemory = surface->dib.bitmap_data;
7243     }
7244
7245     return hr;
7246 }
7247
7248 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7249         enum wined3d_format_id format_id, DWORD usage, enum wined3d_pool pool,
7250         enum wined3d_multisample_type multisample_type, DWORD multisample_quality, DWORD flags,
7251         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7252 {
7253     struct wined3d_surface *object;
7254     HRESULT hr;
7255
7256     TRACE("device %p, width %u, height %u, format %s\n",
7257             device, width, height, debug_d3dformat(format_id));
7258     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7259             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7260     TRACE("flags %#x, parent %p, parent_ops %p.\n", flags, parent, parent_ops);
7261
7262     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7263     if (!object)
7264         return WINED3DERR_OUTOFVIDEOMEMORY;
7265
7266     if (FAILED(hr = surface_init(object, device->surface_alignment, width, height, multisample_type,
7267             multisample_quality, device, usage, format_id, pool, flags, parent, parent_ops)))
7268     {
7269         WARN("Failed to initialize surface, returning %#x.\n", hr);
7270         HeapFree(GetProcessHeap(), 0, object);
7271         return hr;
7272     }
7273
7274     TRACE("Created surface %p.\n", object);
7275     *surface = object;
7276
7277     return hr;
7278 }