wined3d: Fix the block alignment check in surface_cpu_blt().
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2011 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         enum wined3d_texture_filter_type filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     struct wined3d_surface *overlay, *cur;
46
47     TRACE("surface %p.\n", surface);
48
49     if (surface->texture_name || (surface->flags & SFLAG_PBO)
50              || surface->rb_multisample || surface->rb_resolved
51              || !list_empty(&surface->renderbuffers))
52     {
53         struct wined3d_renderbuffer_entry *entry, *entry2;
54         const struct wined3d_gl_info *gl_info;
55         struct wined3d_context *context;
56
57         context = context_acquire(surface->resource.device, NULL);
58         gl_info = context->gl_info;
59
60         ENTER_GL();
61
62         if (surface->texture_name)
63         {
64             TRACE("Deleting texture %u.\n", surface->texture_name);
65             glDeleteTextures(1, &surface->texture_name);
66         }
67
68         if (surface->flags & SFLAG_PBO)
69         {
70             TRACE("Deleting PBO %u.\n", surface->pbo);
71             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
72         }
73
74         if (surface->rb_multisample)
75         {
76             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
77             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
78         }
79
80         if (surface->rb_resolved)
81         {
82             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
83             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
84         }
85
86         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
87         {
88             TRACE("Deleting renderbuffer %u.\n", entry->id);
89             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
90             HeapFree(GetProcessHeap(), 0, entry);
91         }
92
93         LEAVE_GL();
94
95         context_release(context);
96     }
97
98     if (surface->flags & SFLAG_DIBSECTION)
99     {
100         DeleteDC(surface->hDC);
101         DeleteObject(surface->dib.DIBsection);
102         surface->dib.bitmap_data = NULL;
103         surface->resource.allocatedMemory = NULL;
104     }
105
106     if (surface->flags & SFLAG_USERPTR)
107         wined3d_surface_set_mem(surface, NULL);
108     if (surface->overlay_dest)
109         list_remove(&surface->overlay_entry);
110
111     LIST_FOR_EACH_ENTRY_SAFE(overlay, cur, &surface->overlays, struct wined3d_surface, overlay_entry)
112     {
113         list_remove(&overlay->overlay_entry);
114         overlay->overlay_dest = NULL;
115     }
116
117     resource_cleanup(&surface->resource);
118 }
119
120 void surface_update_draw_binding(struct wined3d_surface *surface)
121 {
122     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
123         surface->draw_binding = SFLAG_INDRAWABLE;
124     else if (surface->resource.multisample_type)
125         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
126     else
127         surface->draw_binding = SFLAG_INTEXTURE;
128 }
129
130 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
131 {
132     TRACE("surface %p, container %p.\n", surface, container);
133
134     if (!container && type != WINED3D_CONTAINER_NONE)
135         ERR("Setting NULL container of type %#x.\n", type);
136
137     if (type == WINED3D_CONTAINER_SWAPCHAIN)
138     {
139         surface->get_drawable_size = get_drawable_size_swapchain;
140     }
141     else
142     {
143         switch (wined3d_settings.offscreen_rendering_mode)
144         {
145             case ORM_FBO:
146                 surface->get_drawable_size = get_drawable_size_fbo;
147                 break;
148
149             case ORM_BACKBUFFER:
150                 surface->get_drawable_size = get_drawable_size_backbuffer;
151                 break;
152
153             default:
154                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
155                 return;
156         }
157     }
158
159     surface->container.type = type;
160     surface->container.u.base = container;
161     surface_update_draw_binding(surface);
162 }
163
164 struct blt_info
165 {
166     GLenum binding;
167     GLenum bind_target;
168     enum tex_types tex_type;
169     GLfloat coords[4][3];
170 };
171
172 struct float_rect
173 {
174     float l;
175     float t;
176     float r;
177     float b;
178 };
179
180 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
181 {
182     f->l = ((r->left * 2.0f) / w) - 1.0f;
183     f->t = ((r->top * 2.0f) / h) - 1.0f;
184     f->r = ((r->right * 2.0f) / w) - 1.0f;
185     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
186 }
187
188 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
189 {
190     GLfloat (*coords)[3] = info->coords;
191     struct float_rect f;
192
193     switch (target)
194     {
195         default:
196             FIXME("Unsupported texture target %#x\n", target);
197             /* Fall back to GL_TEXTURE_2D */
198         case GL_TEXTURE_2D:
199             info->binding = GL_TEXTURE_BINDING_2D;
200             info->bind_target = GL_TEXTURE_2D;
201             info->tex_type = tex_2d;
202             coords[0][0] = (float)rect->left / w;
203             coords[0][1] = (float)rect->top / h;
204             coords[0][2] = 0.0f;
205
206             coords[1][0] = (float)rect->right / w;
207             coords[1][1] = (float)rect->top / h;
208             coords[1][2] = 0.0f;
209
210             coords[2][0] = (float)rect->left / w;
211             coords[2][1] = (float)rect->bottom / h;
212             coords[2][2] = 0.0f;
213
214             coords[3][0] = (float)rect->right / w;
215             coords[3][1] = (float)rect->bottom / h;
216             coords[3][2] = 0.0f;
217             break;
218
219         case GL_TEXTURE_RECTANGLE_ARB:
220             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
221             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
222             info->tex_type = tex_rect;
223             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
224             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
225             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
226             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
227             break;
228
229         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
230             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
231             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
232             info->tex_type = tex_cube;
233             cube_coords_float(rect, w, h, &f);
234
235             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
236             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
237             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
238             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
239             break;
240
241         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
242             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
243             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
244             info->tex_type = tex_cube;
245             cube_coords_float(rect, w, h, &f);
246
247             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
248             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
249             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
250             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
251             break;
252
253         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
254             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
255             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
256             info->tex_type = tex_cube;
257             cube_coords_float(rect, w, h, &f);
258
259             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
260             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
261             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
262             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
263             break;
264
265         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
266             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
267             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
268             info->tex_type = tex_cube;
269             cube_coords_float(rect, w, h, &f);
270
271             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
272             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
273             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
274             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
275             break;
276
277         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
278             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
279             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
280             info->tex_type = tex_cube;
281             cube_coords_float(rect, w, h, &f);
282
283             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
284             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
285             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
286             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
287             break;
288
289         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
290             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
291             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
292             info->tex_type = tex_cube;
293             cube_coords_float(rect, w, h, &f);
294
295             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
296             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
297             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
298             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
299             break;
300     }
301 }
302
303 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
304 {
305     if (rect_in)
306         *rect_out = *rect_in;
307     else
308     {
309         rect_out->left = 0;
310         rect_out->top = 0;
311         rect_out->right = surface->resource.width;
312         rect_out->bottom = surface->resource.height;
313     }
314 }
315
316 /* GL locking and context activation is done by the caller */
317 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
318         const RECT *src_rect, const RECT *dst_rect, enum wined3d_texture_filter_type filter)
319 {
320     struct blt_info info;
321
322     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
323
324     glEnable(info.bind_target);
325     checkGLcall("glEnable(bind_target)");
326
327     context_bind_texture(context, info.bind_target, src_surface->texture_name);
328
329     /* Filtering for StretchRect */
330     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
331             wined3d_gl_mag_filter(magLookup, filter));
332     checkGLcall("glTexParameteri");
333     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
334             wined3d_gl_min_mip_filter(minMipLookup, filter, WINED3D_TEXF_NONE));
335     checkGLcall("glTexParameteri");
336     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
337     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
338     if (context->gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
339         glTexParameteri(info.bind_target, GL_TEXTURE_SRGB_DECODE_EXT, GL_SKIP_DECODE_EXT);
340     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
341     checkGLcall("glTexEnvi");
342
343     /* Draw a quad */
344     glBegin(GL_TRIANGLE_STRIP);
345     glTexCoord3fv(info.coords[0]);
346     glVertex2i(dst_rect->left, dst_rect->top);
347
348     glTexCoord3fv(info.coords[1]);
349     glVertex2i(dst_rect->right, dst_rect->top);
350
351     glTexCoord3fv(info.coords[2]);
352     glVertex2i(dst_rect->left, dst_rect->bottom);
353
354     glTexCoord3fv(info.coords[3]);
355     glVertex2i(dst_rect->right, dst_rect->bottom);
356     glEnd();
357
358     /* Unbind the texture */
359     context_bind_texture(context, info.bind_target, 0);
360
361     /* We changed the filtering settings on the texture. Inform the
362      * container about this to get the filters reset properly next draw. */
363     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
364     {
365         struct wined3d_texture *texture = src_surface->container.u.texture;
366         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3D_TEXF_POINT;
367         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3D_TEXF_POINT;
368         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3D_TEXF_NONE;
369         texture->texture_rgb.states[WINED3DTEXSTA_SRGBTEXTURE] = FALSE;
370     }
371 }
372
373 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
374 {
375     const struct wined3d_format *format = surface->resource.format;
376     SYSTEM_INFO sysInfo;
377     BITMAPINFO *b_info;
378     int extraline = 0;
379     DWORD *masks;
380
381     TRACE("surface %p.\n", surface);
382
383     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
384     {
385         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
386         return WINED3DERR_INVALIDCALL;
387     }
388
389     switch (format->byte_count)
390     {
391         case 2:
392         case 4:
393             /* Allocate extra space to store the RGB bit masks. */
394             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
395             break;
396
397         case 3:
398             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
399             break;
400
401         default:
402             /* Allocate extra space for a palette. */
403             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
404                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
405             break;
406     }
407
408     if (!b_info)
409         return E_OUTOFMEMORY;
410
411     /* Some applications access the surface in via DWORDs, and do not take
412      * the necessary care at the end of the surface. So we need at least
413      * 4 extra bytes at the end of the surface. Check against the page size,
414      * if the last page used for the surface has at least 4 spare bytes we're
415      * safe, otherwise add an extra line to the DIB section. */
416     GetSystemInfo(&sysInfo);
417     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
418     {
419         extraline = 1;
420         TRACE("Adding an extra line to the DIB section.\n");
421     }
422
423     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
424     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
425     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
426     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
427     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
428             * wined3d_surface_get_pitch(surface);
429     b_info->bmiHeader.biPlanes = 1;
430     b_info->bmiHeader.biBitCount = format->byte_count * 8;
431
432     b_info->bmiHeader.biXPelsPerMeter = 0;
433     b_info->bmiHeader.biYPelsPerMeter = 0;
434     b_info->bmiHeader.biClrUsed = 0;
435     b_info->bmiHeader.biClrImportant = 0;
436
437     /* Get the bit masks */
438     masks = (DWORD *)b_info->bmiColors;
439     switch (surface->resource.format->id)
440     {
441         case WINED3DFMT_B8G8R8_UNORM:
442             b_info->bmiHeader.biCompression = BI_RGB;
443             break;
444
445         case WINED3DFMT_B5G5R5X1_UNORM:
446         case WINED3DFMT_B5G5R5A1_UNORM:
447         case WINED3DFMT_B4G4R4A4_UNORM:
448         case WINED3DFMT_B4G4R4X4_UNORM:
449         case WINED3DFMT_B2G3R3_UNORM:
450         case WINED3DFMT_B2G3R3A8_UNORM:
451         case WINED3DFMT_R10G10B10A2_UNORM:
452         case WINED3DFMT_R8G8B8A8_UNORM:
453         case WINED3DFMT_R8G8B8X8_UNORM:
454         case WINED3DFMT_B10G10R10A2_UNORM:
455         case WINED3DFMT_B5G6R5_UNORM:
456         case WINED3DFMT_R16G16B16A16_UNORM:
457             b_info->bmiHeader.biCompression = BI_BITFIELDS;
458             masks[0] = format->red_mask;
459             masks[1] = format->green_mask;
460             masks[2] = format->blue_mask;
461             break;
462
463         default:
464             /* Don't know palette */
465             b_info->bmiHeader.biCompression = BI_RGB;
466             break;
467     }
468
469     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
470             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
471             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
472     surface->dib.DIBsection = CreateDIBSection(0, b_info, DIB_RGB_COLORS, &surface->dib.bitmap_data, 0, 0);
473
474     if (!surface->dib.DIBsection)
475     {
476         ERR("Failed to create DIB section.\n");
477         HeapFree(GetProcessHeap(), 0, b_info);
478         return HRESULT_FROM_WIN32(GetLastError());
479     }
480
481     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
482     /* Copy the existing surface to the dib section. */
483     if (surface->resource.allocatedMemory)
484     {
485         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
486                 surface->resource.height * wined3d_surface_get_pitch(surface));
487     }
488     else
489     {
490         /* This is to make maps read the GL texture although memory is allocated. */
491         surface->flags &= ~SFLAG_INSYSMEM;
492     }
493     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
494
495     HeapFree(GetProcessHeap(), 0, b_info);
496
497     /* Now allocate a DC. */
498     surface->hDC = CreateCompatibleDC(0);
499     SelectObject(surface->hDC, surface->dib.DIBsection);
500     TRACE("Using wined3d palette %p.\n", surface->palette);
501     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
502
503     surface->flags |= SFLAG_DIBSECTION;
504
505     return WINED3D_OK;
506 }
507
508 static BOOL surface_need_pbo(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
509 {
510     if (surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
511         return FALSE;
512     if (!(surface->flags & SFLAG_DYNLOCK))
513         return FALSE;
514     if (surface->flags & (SFLAG_CONVERTED | SFLAG_NONPOW2 | SFLAG_PIN_SYSMEM))
515         return FALSE;
516     if (!gl_info->supported[ARB_PIXEL_BUFFER_OBJECT])
517         return FALSE;
518
519     return TRUE;
520 }
521
522 static void surface_load_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
523 {
524     struct wined3d_context *context;
525     GLenum error;
526
527     context = context_acquire(surface->resource.device, NULL);
528     ENTER_GL();
529
530     GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
531     error = glGetError();
532     if (!surface->pbo || error != GL_NO_ERROR)
533         ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
534
535     TRACE("Binding PBO %u.\n", surface->pbo);
536
537     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
538     checkGLcall("glBindBufferARB");
539
540     GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
541             surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
542     checkGLcall("glBufferDataARB");
543
544     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
545     checkGLcall("glBindBufferARB");
546
547     /* We don't need the system memory anymore and we can't even use it for PBOs. */
548     if (!(surface->flags & SFLAG_CLIENT))
549     {
550         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
551         surface->resource.heapMemory = NULL;
552     }
553     surface->resource.allocatedMemory = NULL;
554     surface->flags |= SFLAG_PBO;
555     LEAVE_GL();
556     context_release(context);
557 }
558
559 static void surface_prepare_system_memory(struct wined3d_surface *surface)
560 {
561     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
562
563     TRACE("surface %p.\n", surface);
564
565     if (!(surface->flags & SFLAG_PBO) && surface_need_pbo(surface, gl_info))
566         surface_load_pbo(surface, gl_info);
567     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
568     {
569         /* Whatever surface we have, make sure that there is memory allocated
570          * for the downloaded copy, or a PBO to map. */
571         if (!surface->resource.heapMemory)
572             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
573
574         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
575                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
576
577         if (surface->flags & SFLAG_INSYSMEM)
578             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
579     }
580 }
581
582 static void surface_evict_sysmem(struct wined3d_surface *surface)
583 {
584     if (surface->resource.map_count || (surface->flags & SFLAG_DONOTFREE))
585         return;
586
587     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
588     surface->resource.allocatedMemory = NULL;
589     surface->resource.heapMemory = NULL;
590     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
591 }
592
593 /* Context activation is done by the caller. */
594 static void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
595 {
596     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
597
598     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
599     {
600         struct wined3d_texture *texture = surface->container.u.texture;
601
602         TRACE("Passing to container (%p).\n", texture);
603         texture->texture_ops->texture_bind(texture, context, srgb);
604     }
605     else
606     {
607         if (surface->texture_level)
608         {
609             ERR("Standalone surface %p is non-zero texture level %u.\n",
610                     surface, surface->texture_level);
611         }
612
613         if (srgb)
614             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
615
616         ENTER_GL();
617
618         if (!surface->texture_name)
619         {
620             glGenTextures(1, &surface->texture_name);
621             checkGLcall("glGenTextures");
622
623             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
624
625             context_bind_texture(context, surface->texture_target, surface->texture_name);
626             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
627             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
628             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
629             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
630             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
631             checkGLcall("glTexParameteri");
632         }
633         else
634         {
635             context_bind_texture(context, surface->texture_target, surface->texture_name);
636         }
637
638         LEAVE_GL();
639     }
640 }
641
642 /* Context activation is done by the caller. */
643 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
644         struct wined3d_context *context, BOOL srgb)
645 {
646     struct wined3d_device *device = surface->resource.device;
647     DWORD active_sampler;
648
649     /* We don't need a specific texture unit, but after binding the texture
650      * the current unit is dirty. Read the unit back instead of switching to
651      * 0, this avoids messing around with the state manager's GL states. The
652      * current texture unit should always be a valid one.
653      *
654      * To be more specific, this is tricky because we can implicitly be
655      * called from sampler() in state.c. This means we can't touch anything
656      * other than whatever happens to be the currently active texture, or we
657      * would risk marking already applied sampler states dirty again. */
658     active_sampler = device->rev_tex_unit_map[context->active_texture];
659
660     if (active_sampler != WINED3D_UNMAPPED_STAGE)
661         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
662     surface_bind(surface, context, srgb);
663 }
664
665 static void surface_force_reload(struct wined3d_surface *surface)
666 {
667     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
668 }
669
670 static void surface_release_client_storage(struct wined3d_surface *surface)
671 {
672     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
673
674     ENTER_GL();
675     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
676     if (surface->texture_name)
677     {
678         surface_bind_and_dirtify(surface, context, FALSE);
679         glTexImage2D(surface->texture_target, surface->texture_level,
680                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
681     }
682     if (surface->texture_name_srgb)
683     {
684         surface_bind_and_dirtify(surface, context, TRUE);
685         glTexImage2D(surface->texture_target, surface->texture_level,
686                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
687     }
688     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
689     LEAVE_GL();
690
691     context_release(context);
692
693     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
694     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
695     surface_force_reload(surface);
696 }
697
698 static HRESULT surface_private_setup(struct wined3d_surface *surface)
699 {
700     /* TODO: Check against the maximum texture sizes supported by the video card. */
701     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
702     unsigned int pow2Width, pow2Height;
703
704     TRACE("surface %p.\n", surface);
705
706     surface->texture_name = 0;
707     surface->texture_target = GL_TEXTURE_2D;
708
709     /* Non-power2 support */
710     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
711     {
712         pow2Width = surface->resource.width;
713         pow2Height = surface->resource.height;
714     }
715     else
716     {
717         /* Find the nearest pow2 match */
718         pow2Width = pow2Height = 1;
719         while (pow2Width < surface->resource.width)
720             pow2Width <<= 1;
721         while (pow2Height < surface->resource.height)
722             pow2Height <<= 1;
723     }
724     surface->pow2Width = pow2Width;
725     surface->pow2Height = pow2Height;
726
727     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
728     {
729         /* TODO: Add support for non power two compressed textures. */
730         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
731         {
732             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
733                   surface, surface->resource.width, surface->resource.height);
734             return WINED3DERR_NOTAVAILABLE;
735         }
736     }
737
738     if (pow2Width != surface->resource.width
739             || pow2Height != surface->resource.height)
740     {
741         surface->flags |= SFLAG_NONPOW2;
742     }
743
744     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
745             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
746     {
747         /* One of three options:
748          * 1: Do the same as we do with NPOT and scale the texture, (any
749          *    texture ops would require the texture to be scaled which is
750          *    potentially slow)
751          * 2: Set the texture to the maximum size (bad idea).
752          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
753          * 4: Create the surface, but allow it to be used only for DirectDraw
754          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
755          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
756          *    the render target. */
757         if (surface->resource.pool == WINED3D_POOL_DEFAULT || surface->resource.pool == WINED3D_POOL_MANAGED)
758         {
759             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
760             return WINED3DERR_NOTAVAILABLE;
761         }
762
763         /* We should never use this surface in combination with OpenGL! */
764         TRACE("Creating an oversized surface: %ux%u.\n",
765                 surface->pow2Width, surface->pow2Height);
766     }
767     else
768     {
769         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
770          * and EXT_PALETTED_TEXTURE is used in combination with texture
771          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
772          * EXT_PALETTED_TEXTURE doesn't work in combination with
773          * ARB_TEXTURE_RECTANGLE. */
774         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
775                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
776                 && gl_info->supported[EXT_PALETTED_TEXTURE]
777                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
778         {
779             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
780             surface->pow2Width = surface->resource.width;
781             surface->pow2Height = surface->resource.height;
782             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
783         }
784     }
785
786     switch (wined3d_settings.offscreen_rendering_mode)
787     {
788         case ORM_FBO:
789             surface->get_drawable_size = get_drawable_size_fbo;
790             break;
791
792         case ORM_BACKBUFFER:
793             surface->get_drawable_size = get_drawable_size_backbuffer;
794             break;
795
796         default:
797             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
798             return WINED3DERR_INVALIDCALL;
799     }
800
801     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
802         surface->flags |= SFLAG_DISCARDED;
803
804     return WINED3D_OK;
805 }
806
807 static void surface_realize_palette(struct wined3d_surface *surface)
808 {
809     struct wined3d_palette *palette = surface->palette;
810
811     TRACE("surface %p.\n", surface);
812
813     if (!palette) return;
814
815     if (surface->resource.format->id == WINED3DFMT_P8_UINT
816             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
817     {
818         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
819         {
820             /* Make sure the texture is up to date. This call doesn't do
821              * anything if the texture is already up to date. */
822             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
823
824             /* We want to force a palette refresh, so mark the drawable as not being up to date */
825             if (!surface_is_offscreen(surface))
826                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
827         }
828         else
829         {
830             if (!(surface->flags & SFLAG_INSYSMEM))
831             {
832                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
833                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
834             }
835             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
836         }
837     }
838
839     if (surface->flags & SFLAG_DIBSECTION)
840     {
841         RGBQUAD col[256];
842         unsigned int i;
843
844         TRACE("Updating the DC's palette.\n");
845
846         for (i = 0; i < 256; ++i)
847         {
848             col[i].rgbRed   = palette->palents[i].peRed;
849             col[i].rgbGreen = palette->palents[i].peGreen;
850             col[i].rgbBlue  = palette->palents[i].peBlue;
851             col[i].rgbReserved = 0;
852         }
853         SetDIBColorTable(surface->hDC, 0, 256, col);
854     }
855
856     /* Propagate the changes to the drawable when we have a palette. */
857     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
858         surface_load_location(surface, surface->draw_binding, NULL);
859 }
860
861 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
862 {
863     HRESULT hr;
864
865     /* If there's no destination surface there is nothing to do. */
866     if (!surface->overlay_dest)
867         return WINED3D_OK;
868
869     /* Blt calls ModifyLocation on the dest surface, which in turn calls
870      * DrawOverlay to update the overlay. Prevent an endless recursion. */
871     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
872         return WINED3D_OK;
873
874     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
875     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
876             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3D_TEXF_LINEAR);
877     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
878
879     return hr;
880 }
881
882 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
883 {
884     struct wined3d_device *device = surface->resource.device;
885     const RECT *pass_rect = rect;
886
887     TRACE("surface %p, rect %s, flags %#x.\n",
888             surface, wine_dbgstr_rect(rect), flags);
889
890     if (flags & WINED3D_MAP_DISCARD)
891     {
892         TRACE("WINED3D_MAP_DISCARD flag passed, marking SYSMEM as up to date.\n");
893         surface_prepare_system_memory(surface);
894         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
895     }
896     else
897     {
898         /* surface_load_location() does not check if the rectangle specifies
899          * the full surface. Most callers don't need that, so do it here. */
900         if (rect && !rect->top && !rect->left
901                 && rect->right == surface->resource.width
902                 && rect->bottom == surface->resource.height)
903             pass_rect = NULL;
904         surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
905     }
906
907     if (surface->flags & SFLAG_PBO)
908     {
909         const struct wined3d_gl_info *gl_info;
910         struct wined3d_context *context;
911
912         context = context_acquire(device, NULL);
913         gl_info = context->gl_info;
914
915         ENTER_GL();
916         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
917         checkGLcall("glBindBufferARB");
918
919         /* This shouldn't happen but could occur if some other function
920          * didn't handle the PBO properly. */
921         if (surface->resource.allocatedMemory)
922             ERR("The surface already has PBO memory allocated.\n");
923
924         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
925         checkGLcall("glMapBufferARB");
926
927         /* Make sure the PBO isn't set anymore in order not to break non-PBO
928          * calls. */
929         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
930         checkGLcall("glBindBufferARB");
931
932         LEAVE_GL();
933         context_release(context);
934     }
935
936     if (!(flags & (WINED3D_MAP_NO_DIRTY_UPDATE | WINED3D_MAP_READONLY)))
937     {
938         if (!rect)
939             surface_add_dirty_rect(surface, NULL);
940         else
941         {
942             struct wined3d_box b;
943
944             b.left = rect->left;
945             b.top = rect->top;
946             b.right = rect->right;
947             b.bottom = rect->bottom;
948             b.front = 0;
949             b.back = 1;
950             surface_add_dirty_rect(surface, &b);
951         }
952     }
953 }
954
955 static void surface_unmap(struct wined3d_surface *surface)
956 {
957     struct wined3d_device *device = surface->resource.device;
958     BOOL fullsurface;
959
960     TRACE("surface %p.\n", surface);
961
962     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
963
964     if (surface->flags & SFLAG_PBO)
965     {
966         const struct wined3d_gl_info *gl_info;
967         struct wined3d_context *context;
968
969         TRACE("Freeing PBO memory.\n");
970
971         context = context_acquire(device, NULL);
972         gl_info = context->gl_info;
973
974         ENTER_GL();
975         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
976         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
977         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
978         checkGLcall("glUnmapBufferARB");
979         LEAVE_GL();
980         context_release(context);
981
982         surface->resource.allocatedMemory = NULL;
983     }
984
985     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
986
987     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
988     {
989         TRACE("Not dirtified, nothing to do.\n");
990         goto done;
991     }
992
993     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
994             && surface->container.u.swapchain->front_buffer == surface)
995     {
996         if (!surface->dirtyRect.left && !surface->dirtyRect.top
997                 && surface->dirtyRect.right == surface->resource.width
998                 && surface->dirtyRect.bottom == surface->resource.height)
999         {
1000             fullsurface = TRUE;
1001         }
1002         else
1003         {
1004             /* TODO: Proper partial rectangle tracking. */
1005             fullsurface = FALSE;
1006             surface->flags |= SFLAG_INSYSMEM;
1007         }
1008
1009         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
1010
1011         /* Partial rectangle tracking is not commonly implemented, it is only
1012          * done for render targets. INSYSMEM was set before to tell
1013          * surface_load_location() where to read the rectangle from.
1014          * Indrawable is set because all modifications from the partial
1015          * sysmem copy are written back to the drawable, thus the surface is
1016          * merged again in the drawable. The sysmem copy is not fully up to
1017          * date because only a subrectangle was read in Map(). */
1018         if (!fullsurface)
1019         {
1020             surface_modify_location(surface, surface->draw_binding, TRUE);
1021             surface_evict_sysmem(surface);
1022         }
1023
1024         surface->dirtyRect.left = surface->resource.width;
1025         surface->dirtyRect.top = surface->resource.height;
1026         surface->dirtyRect.right = 0;
1027         surface->dirtyRect.bottom = 0;
1028     }
1029     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
1030     {
1031         FIXME("Depth / stencil buffer locking is not implemented.\n");
1032     }
1033
1034 done:
1035     /* Overlays have to be redrawn manually after changes with the GL implementation */
1036     if (surface->overlay_dest)
1037         surface_draw_overlay(surface);
1038 }
1039
1040 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1041 {
1042     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1043         return FALSE;
1044     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1045         return FALSE;
1046     return TRUE;
1047 }
1048
1049 static void wined3d_surface_depth_blt_fbo(const struct wined3d_device *device, struct wined3d_surface *src_surface,
1050         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1051 {
1052     const struct wined3d_gl_info *gl_info;
1053     struct wined3d_context *context;
1054     DWORD src_mask, dst_mask;
1055     GLbitfield gl_mask;
1056
1057     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1058             device, src_surface, wine_dbgstr_rect(src_rect),
1059             dst_surface, wine_dbgstr_rect(dst_rect));
1060
1061     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1062     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1063
1064     if (src_mask != dst_mask)
1065     {
1066         ERR("Incompatible formats %s and %s.\n",
1067                 debug_d3dformat(src_surface->resource.format->id),
1068                 debug_d3dformat(dst_surface->resource.format->id));
1069         return;
1070     }
1071
1072     if (!src_mask)
1073     {
1074         ERR("Not a depth / stencil format: %s.\n",
1075                 debug_d3dformat(src_surface->resource.format->id));
1076         return;
1077     }
1078
1079     gl_mask = 0;
1080     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1081         gl_mask |= GL_DEPTH_BUFFER_BIT;
1082     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1083         gl_mask |= GL_STENCIL_BUFFER_BIT;
1084
1085     /* Make sure the locations are up-to-date. Loading the destination
1086      * surface isn't required if the entire surface is overwritten. */
1087     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1088     if (!surface_is_full_rect(dst_surface, dst_rect))
1089         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1090
1091     context = context_acquire(device, NULL);
1092     if (!context->valid)
1093     {
1094         context_release(context);
1095         WARN("Invalid context, skipping blit.\n");
1096         return;
1097     }
1098
1099     gl_info = context->gl_info;
1100
1101     ENTER_GL();
1102
1103     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1104     glReadBuffer(GL_NONE);
1105     checkGLcall("glReadBuffer()");
1106     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1107
1108     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1109     context_set_draw_buffer(context, GL_NONE);
1110     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1111     context_invalidate_state(context, STATE_FRAMEBUFFER);
1112
1113     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1114     {
1115         glDepthMask(GL_TRUE);
1116         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_ZWRITEENABLE));
1117     }
1118     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1119     {
1120         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1121         {
1122             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1123             context_invalidate_state(context, STATE_RENDER(WINED3D_RS_TWOSIDEDSTENCILMODE));
1124         }
1125         glStencilMask(~0U);
1126         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_STENCILWRITEMASK));
1127     }
1128
1129     glDisable(GL_SCISSOR_TEST);
1130     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1131
1132     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1133             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1134     checkGLcall("glBlitFramebuffer()");
1135
1136     LEAVE_GL();
1137
1138     if (wined3d_settings.strict_draw_ordering)
1139         wglFlush(); /* Flush to ensure ordering across contexts. */
1140
1141     context_release(context);
1142 }
1143
1144 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1145  * Depth / stencil is not supported. */
1146 static void surface_blt_fbo(const struct wined3d_device *device, enum wined3d_texture_filter_type filter,
1147         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1148         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1149 {
1150     const struct wined3d_gl_info *gl_info;
1151     struct wined3d_context *context;
1152     RECT src_rect, dst_rect;
1153     GLenum gl_filter;
1154     GLenum buffer;
1155
1156     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1157     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1158             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1159     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1160             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1161
1162     src_rect = *src_rect_in;
1163     dst_rect = *dst_rect_in;
1164
1165     switch (filter)
1166     {
1167         case WINED3D_TEXF_LINEAR:
1168             gl_filter = GL_LINEAR;
1169             break;
1170
1171         default:
1172             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1173         case WINED3D_TEXF_NONE:
1174         case WINED3D_TEXF_POINT:
1175             gl_filter = GL_NEAREST;
1176             break;
1177     }
1178
1179     /* Resolve the source surface first if needed. */
1180     if (src_location == SFLAG_INRB_MULTISAMPLE
1181             && (src_surface->resource.format->id != dst_surface->resource.format->id
1182                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1183                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1184         src_location = SFLAG_INRB_RESOLVED;
1185
1186     /* Make sure the locations are up-to-date. Loading the destination
1187      * surface isn't required if the entire surface is overwritten. (And is
1188      * in fact harmful if we're being called by surface_load_location() with
1189      * the purpose of loading the destination surface.) */
1190     surface_load_location(src_surface, src_location, NULL);
1191     if (!surface_is_full_rect(dst_surface, &dst_rect))
1192         surface_load_location(dst_surface, dst_location, NULL);
1193
1194     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1195     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1196     else context = context_acquire(device, NULL);
1197
1198     if (!context->valid)
1199     {
1200         context_release(context);
1201         WARN("Invalid context, skipping blit.\n");
1202         return;
1203     }
1204
1205     gl_info = context->gl_info;
1206
1207     if (src_location == SFLAG_INDRAWABLE)
1208     {
1209         TRACE("Source surface %p is onscreen.\n", src_surface);
1210         buffer = surface_get_gl_buffer(src_surface);
1211         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1212     }
1213     else
1214     {
1215         TRACE("Source surface %p is offscreen.\n", src_surface);
1216         buffer = GL_COLOR_ATTACHMENT0;
1217     }
1218
1219     ENTER_GL();
1220     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1221     glReadBuffer(buffer);
1222     checkGLcall("glReadBuffer()");
1223     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1224     LEAVE_GL();
1225
1226     if (dst_location == SFLAG_INDRAWABLE)
1227     {
1228         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1229         buffer = surface_get_gl_buffer(dst_surface);
1230         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1231     }
1232     else
1233     {
1234         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1235         buffer = GL_COLOR_ATTACHMENT0;
1236     }
1237
1238     ENTER_GL();
1239     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1240     context_set_draw_buffer(context, buffer);
1241     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1242     context_invalidate_state(context, STATE_FRAMEBUFFER);
1243
1244     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1245     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE));
1246     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE1));
1247     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE2));
1248     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE3));
1249
1250     glDisable(GL_SCISSOR_TEST);
1251     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1252
1253     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1254             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1255     checkGLcall("glBlitFramebuffer()");
1256
1257     LEAVE_GL();
1258
1259     if (wined3d_settings.strict_draw_ordering
1260             || (dst_location == SFLAG_INDRAWABLE
1261             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1262         wglFlush();
1263
1264     context_release(context);
1265 }
1266
1267 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1268         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
1269         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
1270 {
1271     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1272         return FALSE;
1273
1274     /* Source and/or destination need to be on the GL side */
1275     if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM)
1276         return FALSE;
1277
1278     switch (blit_op)
1279     {
1280         case WINED3D_BLIT_OP_COLOR_BLIT:
1281             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1282                 return FALSE;
1283             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1284                 return FALSE;
1285             break;
1286
1287         case WINED3D_BLIT_OP_DEPTH_BLIT:
1288             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1289                 return FALSE;
1290             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1291                 return FALSE;
1292             break;
1293
1294         default:
1295             return FALSE;
1296     }
1297
1298     if (!(src_format->id == dst_format->id
1299             || (is_identity_fixup(src_format->color_fixup)
1300             && is_identity_fixup(dst_format->color_fixup))))
1301         return FALSE;
1302
1303     return TRUE;
1304 }
1305
1306 /* This function checks if the primary render target uses the 8bit paletted format. */
1307 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1308 {
1309     if (device->fb.render_targets && device->fb.render_targets[0])
1310     {
1311         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1312         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1313                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1314             return TRUE;
1315     }
1316     return FALSE;
1317 }
1318
1319 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1320         DWORD color, struct wined3d_color *float_color)
1321 {
1322     const struct wined3d_format *format = surface->resource.format;
1323     const struct wined3d_device *device = surface->resource.device;
1324
1325     switch (format->id)
1326     {
1327         case WINED3DFMT_P8_UINT:
1328             if (surface->palette)
1329             {
1330                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1331                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1332                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1333             }
1334             else
1335             {
1336                 float_color->r = 0.0f;
1337                 float_color->g = 0.0f;
1338                 float_color->b = 0.0f;
1339             }
1340             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1341             break;
1342
1343         case WINED3DFMT_B5G6R5_UNORM:
1344             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1345             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1346             float_color->b = (color & 0x1f) / 31.0f;
1347             float_color->a = 1.0f;
1348             break;
1349
1350         case WINED3DFMT_B8G8R8_UNORM:
1351         case WINED3DFMT_B8G8R8X8_UNORM:
1352             float_color->r = D3DCOLOR_R(color);
1353             float_color->g = D3DCOLOR_G(color);
1354             float_color->b = D3DCOLOR_B(color);
1355             float_color->a = 1.0f;
1356             break;
1357
1358         case WINED3DFMT_B8G8R8A8_UNORM:
1359             float_color->r = D3DCOLOR_R(color);
1360             float_color->g = D3DCOLOR_G(color);
1361             float_color->b = D3DCOLOR_B(color);
1362             float_color->a = D3DCOLOR_A(color);
1363             break;
1364
1365         default:
1366             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1367             return FALSE;
1368     }
1369
1370     return TRUE;
1371 }
1372
1373 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1374 {
1375     const struct wined3d_format *format = surface->resource.format;
1376
1377     switch (format->id)
1378     {
1379         case WINED3DFMT_S1_UINT_D15_UNORM:
1380             *float_depth = depth / (float)0x00007fff;
1381             break;
1382
1383         case WINED3DFMT_D16_UNORM:
1384             *float_depth = depth / (float)0x0000ffff;
1385             break;
1386
1387         case WINED3DFMT_D24_UNORM_S8_UINT:
1388         case WINED3DFMT_X8D24_UNORM:
1389             *float_depth = depth / (float)0x00ffffff;
1390             break;
1391
1392         case WINED3DFMT_D32_UNORM:
1393             *float_depth = depth / (float)0xffffffff;
1394             break;
1395
1396         default:
1397             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1398             return FALSE;
1399     }
1400
1401     return TRUE;
1402 }
1403
1404 /* Do not call while under the GL lock. */
1405 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1406 {
1407     const struct wined3d_resource *resource = &surface->resource;
1408     struct wined3d_device *device = resource->device;
1409     const struct blit_shader *blitter;
1410
1411     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1412             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1413     if (!blitter)
1414     {
1415         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1416         return WINED3DERR_INVALIDCALL;
1417     }
1418
1419     return blitter->depth_fill(device, surface, rect, depth);
1420 }
1421
1422 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1423         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1424 {
1425     struct wined3d_device *device = src_surface->resource.device;
1426
1427     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1428             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1429             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1430         return WINED3DERR_INVALIDCALL;
1431
1432     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1433
1434     surface_modify_ds_location(dst_surface, SFLAG_INTEXTURE,
1435             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1436
1437     return WINED3D_OK;
1438 }
1439
1440 /* Do not call while under the GL lock. */
1441 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1442         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1443         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter)
1444 {
1445     struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1446     struct wined3d_device *device = dst_surface->resource.device;
1447     DWORD src_ds_flags, dst_ds_flags;
1448     RECT src_rect, dst_rect;
1449     BOOL scale, convert;
1450
1451     static const DWORD simple_blit = WINEDDBLT_ASYNC
1452             | WINEDDBLT_COLORFILL
1453             | WINEDDBLT_WAIT
1454             | WINEDDBLT_DEPTHFILL
1455             | WINEDDBLT_DONOTWAIT;
1456
1457     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1458             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1459             flags, fx, debug_d3dtexturefiltertype(filter));
1460     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1461
1462     if (fx)
1463     {
1464         TRACE("dwSize %#x.\n", fx->dwSize);
1465         TRACE("dwDDFX %#x.\n", fx->dwDDFX);
1466         TRACE("dwROP %#x.\n", fx->dwROP);
1467         TRACE("dwDDROP %#x.\n", fx->dwDDROP);
1468         TRACE("dwRotationAngle %#x.\n", fx->dwRotationAngle);
1469         TRACE("dwZBufferOpCode %#x.\n", fx->dwZBufferOpCode);
1470         TRACE("dwZBufferLow %#x.\n", fx->dwZBufferLow);
1471         TRACE("dwZBufferHigh %#x.\n", fx->dwZBufferHigh);
1472         TRACE("dwZBufferBaseDest %#x.\n", fx->dwZBufferBaseDest);
1473         TRACE("dwZDestConstBitDepth %#x.\n", fx->dwZDestConstBitDepth);
1474         TRACE("lpDDSZBufferDest %p.\n", fx->u1.lpDDSZBufferDest);
1475         TRACE("dwZSrcConstBitDepth %#x.\n", fx->dwZSrcConstBitDepth);
1476         TRACE("lpDDSZBufferSrc %p.\n", fx->u2.lpDDSZBufferSrc);
1477         TRACE("dwAlphaEdgeBlendBitDepth %#x.\n", fx->dwAlphaEdgeBlendBitDepth);
1478         TRACE("dwAlphaEdgeBlend %#x.\n", fx->dwAlphaEdgeBlend);
1479         TRACE("dwReserved %#x.\n", fx->dwReserved);
1480         TRACE("dwAlphaDestConstBitDepth %#x.\n", fx->dwAlphaDestConstBitDepth);
1481         TRACE("lpDDSAlphaDest %p.\n", fx->u3.lpDDSAlphaDest);
1482         TRACE("dwAlphaSrcConstBitDepth %#x.\n", fx->dwAlphaSrcConstBitDepth);
1483         TRACE("lpDDSAlphaSrc %p.\n", fx->u4.lpDDSAlphaSrc);
1484         TRACE("lpDDSPattern %p.\n", fx->u5.lpDDSPattern);
1485         TRACE("ddckDestColorkey {%#x, %#x}.\n",
1486                 fx->ddckDestColorkey.color_space_low_value,
1487                 fx->ddckDestColorkey.color_space_high_value);
1488         TRACE("ddckSrcColorkey {%#x, %#x}.\n",
1489                 fx->ddckSrcColorkey.color_space_low_value,
1490                 fx->ddckSrcColorkey.color_space_high_value);
1491     }
1492
1493     if (dst_surface->resource.map_count || (src_surface && src_surface->resource.map_count))
1494     {
1495         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1496         return WINEDDERR_SURFACEBUSY;
1497     }
1498
1499     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1500
1501     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1502             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1503             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1504             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1505             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1506     {
1507         WARN("The application gave us a bad destination rectangle.\n");
1508         return WINEDDERR_INVALIDRECT;
1509     }
1510
1511     if (src_surface)
1512     {
1513         surface_get_rect(src_surface, src_rect_in, &src_rect);
1514
1515         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1516                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1517                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1518                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1519                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1520         {
1521             WARN("Application gave us bad source rectangle for Blt.\n");
1522             return WINEDDERR_INVALIDRECT;
1523         }
1524     }
1525     else
1526     {
1527         memset(&src_rect, 0, sizeof(src_rect));
1528     }
1529
1530     if (!fx || !(fx->dwDDFX))
1531         flags &= ~WINEDDBLT_DDFX;
1532
1533     if (flags & WINEDDBLT_WAIT)
1534         flags &= ~WINEDDBLT_WAIT;
1535
1536     if (flags & WINEDDBLT_ASYNC)
1537     {
1538         static unsigned int once;
1539
1540         if (!once++)
1541             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1542         flags &= ~WINEDDBLT_ASYNC;
1543     }
1544
1545     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1546     if (flags & WINEDDBLT_DONOTWAIT)
1547     {
1548         static unsigned int once;
1549
1550         if (!once++)
1551             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1552         flags &= ~WINEDDBLT_DONOTWAIT;
1553     }
1554
1555     if (!device->d3d_initialized)
1556     {
1557         WARN("D3D not initialized, using fallback.\n");
1558         goto cpu;
1559     }
1560
1561     /* We want to avoid invalidating the sysmem location for converted
1562      * surfaces, since otherwise we'd have to convert the data back when
1563      * locking them. */
1564     if (dst_surface->flags & SFLAG_CONVERTED)
1565     {
1566         WARN("Converted surface, using CPU blit.\n");
1567         return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1568     }
1569
1570     if (flags & ~simple_blit)
1571     {
1572         WARN("Using fallback for complex blit (%#x).\n", flags);
1573         goto fallback;
1574     }
1575
1576     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1577         src_swapchain = src_surface->container.u.swapchain;
1578     else
1579         src_swapchain = NULL;
1580
1581     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1582         dst_swapchain = dst_surface->container.u.swapchain;
1583     else
1584         dst_swapchain = NULL;
1585
1586     /* This isn't strictly needed. FBO blits for example could deal with
1587      * cross-swapchain blits by first downloading the source to a texture
1588      * before switching to the destination context. We just have this here to
1589      * not have to deal with the issue, since cross-swapchain blits should be
1590      * rare. */
1591     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1592     {
1593         FIXME("Using fallback for cross-swapchain blit.\n");
1594         goto fallback;
1595     }
1596
1597     scale = src_surface
1598             && (src_rect.right - src_rect.left != dst_rect.right - dst_rect.left
1599             || src_rect.bottom - src_rect.top != dst_rect.bottom - dst_rect.top);
1600     convert = src_surface && src_surface->resource.format->id != dst_surface->resource.format->id;
1601
1602     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1603     if (src_surface)
1604         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1605     else
1606         src_ds_flags = 0;
1607
1608     if (src_ds_flags || dst_ds_flags)
1609     {
1610         if (flags & WINEDDBLT_DEPTHFILL)
1611         {
1612             float depth;
1613
1614             TRACE("Depth fill.\n");
1615
1616             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1617                 return WINED3DERR_INVALIDCALL;
1618
1619             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1620                 return WINED3D_OK;
1621         }
1622         else
1623         {
1624             if (src_ds_flags != dst_ds_flags)
1625             {
1626                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1627                 return WINED3DERR_INVALIDCALL;
1628             }
1629
1630             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1631                 return WINED3D_OK;
1632         }
1633     }
1634     else
1635     {
1636         /* In principle this would apply to depth blits as well, but we don't
1637          * implement those in the CPU blitter at the moment. */
1638         if ((dst_surface->flags & SFLAG_INSYSMEM)
1639                 && (!src_surface || (src_surface->flags & SFLAG_INSYSMEM)))
1640         {
1641             if (scale)
1642                 TRACE("Not doing sysmem blit because of scaling.\n");
1643             else if (convert)
1644                 TRACE("Not doing sysmem blit because of format conversion.\n");
1645             else
1646                 return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1647         }
1648
1649         if (flags & WINEDDBLT_COLORFILL)
1650         {
1651             struct wined3d_color color;
1652
1653             TRACE("Color fill.\n");
1654
1655             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1656                 goto fallback;
1657
1658             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1659                 return WINED3D_OK;
1660         }
1661         else
1662         {
1663             TRACE("Color blit.\n");
1664
1665             /* Upload */
1666             if ((src_surface->flags & SFLAG_INSYSMEM) && !(dst_surface->flags & SFLAG_INSYSMEM))
1667             {
1668                 if (scale)
1669                     TRACE("Not doing upload because of scaling.\n");
1670                 else if (convert)
1671                     TRACE("Not doing upload because of format conversion.\n");
1672                 else
1673                 {
1674                     POINT dst_point = {dst_rect.left, dst_rect.top};
1675
1676                     if (SUCCEEDED(surface_upload_from_surface(dst_surface, &dst_point, src_surface, &src_rect)))
1677                     {
1678                         if (!surface_is_offscreen(dst_surface))
1679                             surface_load_location(dst_surface, dst_surface->draw_binding, NULL);
1680                         return WINED3D_OK;
1681                     }
1682                 }
1683             }
1684
1685             /* Use present for back -> front blits. The idea behind this is
1686              * that present is potentially faster than a blit, in particular
1687              * when FBO blits aren't available. Some ddraw applications like
1688              * Half-Life and Prince of Persia 3D use Blt() from the backbuffer
1689              * to the frontbuffer instead of doing a Flip(). D3D8 and D3D9
1690              * applications can't blit directly to the frontbuffer. */
1691             if (dst_swapchain && dst_swapchain->back_buffers
1692                     && dst_surface == dst_swapchain->front_buffer
1693                     && src_surface == dst_swapchain->back_buffers[0])
1694             {
1695                 enum wined3d_swap_effect swap_effect = dst_swapchain->desc.swap_effect;
1696
1697                 TRACE("Using present for backbuffer -> frontbuffer blit.\n");
1698
1699                 /* Set the swap effect to COPY, we don't want the backbuffer
1700                  * to become undefined. */
1701                 dst_swapchain->desc.swap_effect = WINED3D_SWAP_EFFECT_COPY;
1702                 wined3d_swapchain_present(dst_swapchain, NULL, NULL, dst_swapchain->win_handle, NULL, 0);
1703                 dst_swapchain->desc.swap_effect = swap_effect;
1704
1705                 return WINED3D_OK;
1706             }
1707
1708             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1709                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1710                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1711             {
1712                 TRACE("Using FBO blit.\n");
1713
1714                 surface_blt_fbo(device, filter,
1715                         src_surface, src_surface->draw_binding, &src_rect,
1716                         dst_surface, dst_surface->draw_binding, &dst_rect);
1717                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1718                 return WINED3D_OK;
1719             }
1720
1721             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1722                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1723                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1724             {
1725                 TRACE("Using arbfp blit.\n");
1726
1727                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1728                     return WINED3D_OK;
1729             }
1730         }
1731     }
1732
1733 fallback:
1734
1735     /* Special cases for render targets. */
1736     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1737             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1738     {
1739         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1740                 src_surface, &src_rect, flags, fx, filter)))
1741             return WINED3D_OK;
1742     }
1743
1744 cpu:
1745
1746     /* For the rest call the X11 surface implementation. For render targets
1747      * this should be implemented OpenGL accelerated in BltOverride, other
1748      * blits are rather rare. */
1749     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1750 }
1751
1752 HRESULT CDECL wined3d_surface_get_render_target_data(struct wined3d_surface *surface,
1753         struct wined3d_surface *render_target)
1754 {
1755     TRACE("surface %p, render_target %p.\n", surface, render_target);
1756
1757     /* TODO: Check surface sizes, pools, etc. */
1758
1759     if (render_target->resource.multisample_type)
1760         return WINED3DERR_INVALIDCALL;
1761
1762     return wined3d_surface_blt(surface, NULL, render_target, NULL, 0, NULL, WINED3D_TEXF_POINT);
1763 }
1764
1765 /* Context activation is done by the caller. */
1766 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1767 {
1768     if (surface->flags & SFLAG_DIBSECTION)
1769     {
1770         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1771     }
1772     else
1773     {
1774         if (!surface->resource.heapMemory)
1775             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1776         else if (!(surface->flags & SFLAG_CLIENT))
1777             ERR("Surface %p has heapMemory %p and flags %#x.\n",
1778                     surface, surface->resource.heapMemory, surface->flags);
1779
1780         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1781                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1782     }
1783
1784     ENTER_GL();
1785     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1786     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1787     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1788             surface->resource.size, surface->resource.allocatedMemory));
1789     checkGLcall("glGetBufferSubDataARB");
1790     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1791     checkGLcall("glDeleteBuffersARB");
1792     LEAVE_GL();
1793
1794     surface->pbo = 0;
1795     surface->flags &= ~SFLAG_PBO;
1796 }
1797
1798 static BOOL surface_init_sysmem(struct wined3d_surface *surface)
1799 {
1800     if (!surface->resource.allocatedMemory)
1801     {
1802         if (!surface->resource.heapMemory)
1803         {
1804             if (!(surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
1805                     surface->resource.size + RESOURCE_ALIGNMENT)))
1806             {
1807                 ERR("Failed to allocate memory.\n");
1808                 return FALSE;
1809             }
1810         }
1811         else if (!(surface->flags & SFLAG_CLIENT))
1812         {
1813             ERR("Surface %p has heapMemory %p and flags %#x.\n",
1814                     surface, surface->resource.heapMemory, surface->flags);
1815         }
1816
1817         surface->resource.allocatedMemory =
1818             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1819     }
1820     else
1821     {
1822         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
1823     }
1824
1825     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1826
1827     return TRUE;
1828 }
1829
1830 /* Do not call while under the GL lock. */
1831 static void surface_unload(struct wined3d_resource *resource)
1832 {
1833     struct wined3d_surface *surface = surface_from_resource(resource);
1834     struct wined3d_renderbuffer_entry *entry, *entry2;
1835     struct wined3d_device *device = resource->device;
1836     const struct wined3d_gl_info *gl_info;
1837     struct wined3d_context *context;
1838
1839     TRACE("surface %p.\n", surface);
1840
1841     if (resource->pool == WINED3D_POOL_DEFAULT)
1842     {
1843         /* Default pool resources are supposed to be destroyed before Reset is called.
1844          * Implicit resources stay however. So this means we have an implicit render target
1845          * or depth stencil. The content may be destroyed, but we still have to tear down
1846          * opengl resources, so we cannot leave early.
1847          *
1848          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1849          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1850          * or the depth stencil into an FBO the texture or render buffer will be removed
1851          * and all flags get lost
1852          */
1853         if (!(surface->flags & SFLAG_PBO))
1854             surface_init_sysmem(surface);
1855         /* We also get here when the ddraw swapchain is destroyed, for example
1856          * for a mode switch. In this case this surface won't necessarily be
1857          * an implicit surface. We have to mark it lost so that the
1858          * application can restore it after the mode switch. */
1859         surface->flags |= SFLAG_LOST;
1860     }
1861     else
1862     {
1863         /* Load the surface into system memory */
1864         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1865         surface_modify_location(surface, surface->draw_binding, FALSE);
1866     }
1867     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1868     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1869     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1870
1871     context = context_acquire(device, NULL);
1872     gl_info = context->gl_info;
1873
1874     /* Destroy PBOs, but load them into real sysmem before */
1875     if (surface->flags & SFLAG_PBO)
1876         surface_remove_pbo(surface, gl_info);
1877
1878     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1879      * all application-created targets the application has to release the surface
1880      * before calling _Reset
1881      */
1882     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1883     {
1884         ENTER_GL();
1885         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1886         LEAVE_GL();
1887         list_remove(&entry->entry);
1888         HeapFree(GetProcessHeap(), 0, entry);
1889     }
1890     list_init(&surface->renderbuffers);
1891     surface->current_renderbuffer = NULL;
1892
1893     ENTER_GL();
1894
1895     /* If we're in a texture, the texture name belongs to the texture.
1896      * Otherwise, destroy it. */
1897     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1898     {
1899         glDeleteTextures(1, &surface->texture_name);
1900         surface->texture_name = 0;
1901         glDeleteTextures(1, &surface->texture_name_srgb);
1902         surface->texture_name_srgb = 0;
1903     }
1904     if (surface->rb_multisample)
1905     {
1906         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1907         surface->rb_multisample = 0;
1908     }
1909     if (surface->rb_resolved)
1910     {
1911         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1912         surface->rb_resolved = 0;
1913     }
1914
1915     LEAVE_GL();
1916
1917     context_release(context);
1918
1919     resource_unload(resource);
1920 }
1921
1922 static const struct wined3d_resource_ops surface_resource_ops =
1923 {
1924     surface_unload,
1925 };
1926
1927 static const struct wined3d_surface_ops surface_ops =
1928 {
1929     surface_private_setup,
1930     surface_realize_palette,
1931     surface_map,
1932     surface_unmap,
1933 };
1934
1935 /*****************************************************************************
1936  * Initializes the GDI surface, aka creates the DIB section we render to
1937  * The DIB section creation is done by calling GetDC, which will create the
1938  * section and releasing the dc to allow the app to use it. The dib section
1939  * will stay until the surface is released
1940  *
1941  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1942  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1943  * avoid confusion in the shared surface code.
1944  *
1945  * Returns:
1946  *  WINED3D_OK on success
1947  *  The return values of called methods on failure
1948  *
1949  *****************************************************************************/
1950 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1951 {
1952     HRESULT hr;
1953
1954     TRACE("surface %p.\n", surface);
1955
1956     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1957     {
1958         ERR("Overlays not yet supported by GDI surfaces.\n");
1959         return WINED3DERR_INVALIDCALL;
1960     }
1961
1962     /* Sysmem textures have memory already allocated - release it,
1963      * this avoids an unnecessary memcpy. */
1964     hr = surface_create_dib_section(surface);
1965     if (SUCCEEDED(hr))
1966     {
1967         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1968         surface->resource.heapMemory = NULL;
1969         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1970     }
1971
1972     /* We don't mind the nonpow2 stuff in GDI. */
1973     surface->pow2Width = surface->resource.width;
1974     surface->pow2Height = surface->resource.height;
1975
1976     return WINED3D_OK;
1977 }
1978
1979 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1980 {
1981     struct wined3d_palette *palette = surface->palette;
1982
1983     TRACE("surface %p.\n", surface);
1984
1985     if (!palette) return;
1986
1987     if (surface->flags & SFLAG_DIBSECTION)
1988     {
1989         RGBQUAD col[256];
1990         unsigned int i;
1991
1992         TRACE("Updating the DC's palette.\n");
1993
1994         for (i = 0; i < 256; ++i)
1995         {
1996             col[i].rgbRed = palette->palents[i].peRed;
1997             col[i].rgbGreen = palette->palents[i].peGreen;
1998             col[i].rgbBlue = palette->palents[i].peBlue;
1999             col[i].rgbReserved = 0;
2000         }
2001         SetDIBColorTable(surface->hDC, 0, 256, col);
2002     }
2003
2004     /* Update the image because of the palette change. Some games like e.g.
2005      * Red Alert call SetEntries a lot to implement fading. */
2006     /* Tell the swapchain to update the screen. */
2007     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2008     {
2009         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2010         if (surface == swapchain->front_buffer)
2011         {
2012             x11_copy_to_screen(swapchain, NULL);
2013         }
2014     }
2015 }
2016
2017 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
2018 {
2019     TRACE("surface %p, rect %s, flags %#x.\n",
2020             surface, wine_dbgstr_rect(rect), flags);
2021
2022     if (!(surface->flags & SFLAG_DIBSECTION))
2023     {
2024         HRESULT hr;
2025
2026         /* This happens on gdi surfaces if the application set a user pointer
2027          * and resets it. Recreate the DIB section. */
2028         if (FAILED(hr = surface_create_dib_section(surface)))
2029         {
2030             ERR("Failed to create dib section, hr %#x.\n", hr);
2031             return;
2032         }
2033         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
2034         surface->resource.heapMemory = NULL;
2035         surface->resource.allocatedMemory = surface->dib.bitmap_data;
2036     }
2037 }
2038
2039 static void gdi_surface_unmap(struct wined3d_surface *surface)
2040 {
2041     TRACE("surface %p.\n", surface);
2042
2043     /* Tell the swapchain to update the screen. */
2044     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2045     {
2046         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2047         if (surface == swapchain->front_buffer)
2048         {
2049             x11_copy_to_screen(swapchain, &surface->lockedRect);
2050         }
2051     }
2052
2053     memset(&surface->lockedRect, 0, sizeof(RECT));
2054 }
2055
2056 static const struct wined3d_surface_ops gdi_surface_ops =
2057 {
2058     gdi_surface_private_setup,
2059     gdi_surface_realize_palette,
2060     gdi_surface_map,
2061     gdi_surface_unmap,
2062 };
2063
2064 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2065 {
2066     GLuint *name;
2067     DWORD flag;
2068
2069     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2070
2071     if(srgb)
2072     {
2073         name = &surface->texture_name_srgb;
2074         flag = SFLAG_INSRGBTEX;
2075     }
2076     else
2077     {
2078         name = &surface->texture_name;
2079         flag = SFLAG_INTEXTURE;
2080     }
2081
2082     if (!*name && new_name)
2083     {
2084         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2085          * surface has no texture name yet. See if we can get rid of this. */
2086         if (surface->flags & flag)
2087         {
2088             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2089             surface_modify_location(surface, flag, FALSE);
2090         }
2091     }
2092
2093     *name = new_name;
2094     surface_force_reload(surface);
2095 }
2096
2097 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
2098 {
2099     TRACE("surface %p, target %#x.\n", surface, target);
2100
2101     if (surface->texture_target != target)
2102     {
2103         if (target == GL_TEXTURE_RECTANGLE_ARB)
2104         {
2105             surface->flags &= ~SFLAG_NORMCOORD;
2106         }
2107         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2108         {
2109             surface->flags |= SFLAG_NORMCOORD;
2110         }
2111     }
2112     surface->texture_target = target;
2113     surface_force_reload(surface);
2114 }
2115
2116 /* This call just downloads data, the caller is responsible for binding the
2117  * correct texture. */
2118 /* Context activation is done by the caller. */
2119 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2120 {
2121     const struct wined3d_format *format = surface->resource.format;
2122
2123     /* Only support read back of converted P8 surfaces. */
2124     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2125     {
2126         ERR("Trying to read back converted surface %p with format %s.\n", surface, debug_d3dformat(format->id));
2127         return;
2128     }
2129
2130     ENTER_GL();
2131
2132     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2133     {
2134         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2135                 surface, surface->texture_level, format->glFormat, format->glType,
2136                 surface->resource.allocatedMemory);
2137
2138         if (surface->flags & SFLAG_PBO)
2139         {
2140             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2141             checkGLcall("glBindBufferARB");
2142             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2143             checkGLcall("glGetCompressedTexImageARB");
2144             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2145             checkGLcall("glBindBufferARB");
2146         }
2147         else
2148         {
2149             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2150                     surface->texture_level, surface->resource.allocatedMemory));
2151             checkGLcall("glGetCompressedTexImageARB");
2152         }
2153
2154         LEAVE_GL();
2155     }
2156     else
2157     {
2158         void *mem;
2159         GLenum gl_format = format->glFormat;
2160         GLenum gl_type = format->glType;
2161         int src_pitch = 0;
2162         int dst_pitch = 0;
2163
2164         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2165         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2166         {
2167             gl_format = GL_ALPHA;
2168             gl_type = GL_UNSIGNED_BYTE;
2169         }
2170
2171         if (surface->flags & SFLAG_NONPOW2)
2172         {
2173             unsigned char alignment = surface->resource.device->surface_alignment;
2174             src_pitch = format->byte_count * surface->pow2Width;
2175             dst_pitch = wined3d_surface_get_pitch(surface);
2176             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2177             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2178         }
2179         else
2180         {
2181             mem = surface->resource.allocatedMemory;
2182         }
2183
2184         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2185                 surface, surface->texture_level, gl_format, gl_type, mem);
2186
2187         if (surface->flags & SFLAG_PBO)
2188         {
2189             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2190             checkGLcall("glBindBufferARB");
2191
2192             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2193             checkGLcall("glGetTexImage");
2194
2195             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2196             checkGLcall("glBindBufferARB");
2197         }
2198         else
2199         {
2200             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2201             checkGLcall("glGetTexImage");
2202         }
2203         LEAVE_GL();
2204
2205         if (surface->flags & SFLAG_NONPOW2)
2206         {
2207             const BYTE *src_data;
2208             BYTE *dst_data;
2209             UINT y;
2210             /*
2211              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2212              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2213              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2214              *
2215              * We're doing this...
2216              *
2217              * instead of boxing the texture :
2218              * |<-texture width ->|  -->pow2width|   /\
2219              * |111111111111111111|              |   |
2220              * |222 Texture 222222| boxed empty  | texture height
2221              * |3333 Data 33333333|              |   |
2222              * |444444444444444444|              |   \/
2223              * -----------------------------------   |
2224              * |     boxed  empty | boxed empty  | pow2height
2225              * |                  |              |   \/
2226              * -----------------------------------
2227              *
2228              *
2229              * we're repacking the data to the expected texture width
2230              *
2231              * |<-texture width ->|  -->pow2width|   /\
2232              * |111111111111111111222222222222222|   |
2233              * |222333333333333333333444444444444| texture height
2234              * |444444                           |   |
2235              * |                                 |   \/
2236              * |                                 |   |
2237              * |            empty                | pow2height
2238              * |                                 |   \/
2239              * -----------------------------------
2240              *
2241              * == is the same as
2242              *
2243              * |<-texture width ->|    /\
2244              * |111111111111111111|
2245              * |222222222222222222|texture height
2246              * |333333333333333333|
2247              * |444444444444444444|    \/
2248              * --------------------
2249              *
2250              * this also means that any references to allocatedMemory should work with the data as if were a
2251              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2252              *
2253              * internally the texture is still stored in a boxed format so any references to textureName will
2254              * get a boxed texture with width pow2width and not a texture of width resource.width.
2255              *
2256              * Performance should not be an issue, because applications normally do not lock the surfaces when
2257              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2258              * and doesn't have to be re-read. */
2259             src_data = mem;
2260             dst_data = surface->resource.allocatedMemory;
2261             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2262             for (y = 1; y < surface->resource.height; ++y)
2263             {
2264                 /* skip the first row */
2265                 src_data += src_pitch;
2266                 dst_data += dst_pitch;
2267                 memcpy(dst_data, src_data, dst_pitch);
2268             }
2269
2270             HeapFree(GetProcessHeap(), 0, mem);
2271         }
2272     }
2273
2274     /* Surface has now been downloaded */
2275     surface->flags |= SFLAG_INSYSMEM;
2276 }
2277
2278 /* This call just uploads data, the caller is responsible for binding the
2279  * correct texture. */
2280 /* Context activation is done by the caller. */
2281 static void surface_upload_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2282         const struct wined3d_format *format, const RECT *src_rect, UINT src_pitch, const POINT *dst_point,
2283         BOOL srgb, const struct wined3d_bo_address *data)
2284 {
2285     UINT update_w = src_rect->right - src_rect->left;
2286     UINT update_h = src_rect->bottom - src_rect->top;
2287
2288     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_pitch %u, dst_point %s, srgb %#x, data {%#x:%p}.\n",
2289             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_pitch,
2290             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2291
2292     if (surface->resource.map_count)
2293     {
2294         WARN("Uploading a surface that is currently mapped, setting SFLAG_PIN_SYSMEM.\n");
2295         surface->flags |= SFLAG_PIN_SYSMEM;
2296     }
2297
2298     if (format->flags & WINED3DFMT_FLAG_HEIGHT_SCALE)
2299     {
2300         update_h *= format->height_scale.numerator;
2301         update_h /= format->height_scale.denominator;
2302     }
2303
2304     ENTER_GL();
2305
2306     if (data->buffer_object)
2307     {
2308         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2309         checkGLcall("glBindBufferARB");
2310     }
2311
2312     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2313     {
2314         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2315         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2316         const BYTE *addr = data->addr;
2317         GLenum internal;
2318
2319         addr += (src_rect->top / format->block_height) * src_pitch;
2320         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2321
2322         if (srgb)
2323             internal = format->glGammaInternal;
2324         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2325             internal = format->rtInternal;
2326         else
2327             internal = format->glInternal;
2328
2329         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2330                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2331                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2332
2333         if (row_length == src_pitch)
2334         {
2335             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2336                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2337         }
2338         else
2339         {
2340             UINT row, y;
2341
2342             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2343              * can't use the unpack row length like below. */
2344             for (row = 0, y = dst_point->y; row < row_count; ++row)
2345             {
2346                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2347                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2348                 y += format->block_height;
2349                 addr += src_pitch;
2350             }
2351         }
2352         checkGLcall("glCompressedTexSubImage2DARB");
2353     }
2354     else
2355     {
2356         const BYTE *addr = data->addr;
2357
2358         addr += src_rect->top * src_pitch;
2359         addr += src_rect->left * format->byte_count;
2360
2361         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2362                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2363                 update_w, update_h, format->glFormat, format->glType, addr);
2364
2365         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_pitch / format->byte_count);
2366         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2367                 update_w, update_h, format->glFormat, format->glType, addr);
2368         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2369         checkGLcall("glTexSubImage2D");
2370     }
2371
2372     if (data->buffer_object)
2373     {
2374         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2375         checkGLcall("glBindBufferARB");
2376     }
2377
2378     LEAVE_GL();
2379
2380     if (wined3d_settings.strict_draw_ordering)
2381         wglFlush();
2382
2383     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2384     {
2385         struct wined3d_device *device = surface->resource.device;
2386         unsigned int i;
2387
2388         for (i = 0; i < device->context_count; ++i)
2389         {
2390             context_surface_update(device->contexts[i], surface);
2391         }
2392     }
2393 }
2394
2395 static HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck, BOOL use_texturing,
2396         struct wined3d_format *format, enum wined3d_conversion_type *conversion_type)
2397 {
2398     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2399     const struct wined3d_device *device = surface->resource.device;
2400     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
2401     BOOL blit_supported = FALSE;
2402
2403     /* Copy the default values from the surface. Below we might perform fixups */
2404     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
2405     *format = *surface->resource.format;
2406     *conversion_type = WINED3D_CT_NONE;
2407
2408     /* Ok, now look if we have to do any conversion */
2409     switch (surface->resource.format->id)
2410     {
2411         case WINED3DFMT_P8_UINT:
2412             /* Below the call to blit_supported is disabled for Wine 1.2
2413              * because the function isn't operating correctly yet. At the
2414              * moment 8-bit blits are handled in software and if certain GL
2415              * extensions are around, surface conversion is performed at
2416              * upload time. The blit_supported call recognizes it as a
2417              * destination fixup. This type of upload 'fixup' and 8-bit to
2418              * 8-bit blits need to be handled by the blit_shader.
2419              * TODO: get rid of this #if 0. */
2420 #if 0
2421             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
2422                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
2423                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
2424 #endif
2425             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
2426
2427             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
2428              * texturing. Further also use conversion in case of color keying.
2429              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
2430              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
2431              * conflicts with this.
2432              */
2433             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
2434                     || colorkey_active || !use_texturing)
2435             {
2436                 format->glFormat = GL_RGBA;
2437                 format->glInternal = GL_RGBA;
2438                 format->glType = GL_UNSIGNED_BYTE;
2439                 format->conv_byte_count = 4;
2440                 if (colorkey_active)
2441                     *conversion_type = WINED3D_CT_PALETTED_CK;
2442                 else
2443                     *conversion_type = WINED3D_CT_PALETTED;
2444             }
2445             break;
2446
2447         case WINED3DFMT_B2G3R3_UNORM:
2448             /* **********************
2449                 GL_UNSIGNED_BYTE_3_3_2
2450                 ********************** */
2451             if (colorkey_active) {
2452                 /* This texture format will never be used.. So do not care about color keying
2453                     up until the point in time it will be needed :-) */
2454                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
2455             }
2456             break;
2457
2458         case WINED3DFMT_B5G6R5_UNORM:
2459             if (colorkey_active)
2460             {
2461                 *conversion_type = WINED3D_CT_CK_565;
2462                 format->glFormat = GL_RGBA;
2463                 format->glInternal = GL_RGB5_A1;
2464                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
2465                 format->conv_byte_count = 2;
2466             }
2467             break;
2468
2469         case WINED3DFMT_B5G5R5X1_UNORM:
2470             if (colorkey_active)
2471             {
2472                 *conversion_type = WINED3D_CT_CK_5551;
2473                 format->glFormat = GL_BGRA;
2474                 format->glInternal = GL_RGB5_A1;
2475                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
2476                 format->conv_byte_count = 2;
2477             }
2478             break;
2479
2480         case WINED3DFMT_B8G8R8_UNORM:
2481             if (colorkey_active)
2482             {
2483                 *conversion_type = WINED3D_CT_CK_RGB24;
2484                 format->glFormat = GL_RGBA;
2485                 format->glInternal = GL_RGBA8;
2486                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
2487                 format->conv_byte_count = 4;
2488             }
2489             break;
2490
2491         case WINED3DFMT_B8G8R8X8_UNORM:
2492             if (colorkey_active)
2493             {
2494                 *conversion_type = WINED3D_CT_RGB32_888;
2495                 format->glFormat = GL_RGBA;
2496                 format->glInternal = GL_RGBA8;
2497                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
2498                 format->conv_byte_count = 4;
2499             }
2500             break;
2501
2502         case WINED3DFMT_B8G8R8A8_UNORM:
2503             if (colorkey_active)
2504             {
2505                 *conversion_type = WINED3D_CT_CK_ARGB32;
2506                 format->conv_byte_count = 4;
2507             }
2508             break;
2509
2510         default:
2511             break;
2512     }
2513
2514     if (*conversion_type != WINED3D_CT_NONE)
2515     {
2516         format->rtInternal = format->glInternal;
2517         format->glGammaInternal = format->glInternal;
2518     }
2519
2520     return WINED3D_OK;
2521 }
2522
2523 static BOOL surface_check_block_align(struct wined3d_surface *surface, const RECT *rect)
2524 {
2525     UINT width_mask, height_mask;
2526
2527     if (!rect->left && !rect->top
2528             && rect->right == surface->resource.width
2529             && rect->bottom == surface->resource.height)
2530         return TRUE;
2531
2532     /* This assumes power of two block sizes, but NPOT block sizes would be
2533      * silly anyway. */
2534     width_mask = surface->resource.format->block_width - 1;
2535     height_mask = surface->resource.format->block_height - 1;
2536
2537     if (!(rect->left & width_mask) && !(rect->top & height_mask)
2538             && !(rect->right & width_mask) && !(rect->bottom & height_mask))
2539         return TRUE;
2540
2541     return FALSE;
2542 }
2543
2544 HRESULT surface_upload_from_surface(struct wined3d_surface *dst_surface, const POINT *dst_point,
2545         struct wined3d_surface *src_surface, const RECT *src_rect)
2546 {
2547     const struct wined3d_format *src_format;
2548     const struct wined3d_format *dst_format;
2549     const struct wined3d_gl_info *gl_info;
2550     enum wined3d_conversion_type convert;
2551     struct wined3d_context *context;
2552     struct wined3d_bo_address data;
2553     struct wined3d_format format;
2554     UINT update_w, update_h;
2555     UINT dst_w, dst_h;
2556     UINT src_w, src_h;
2557     RECT r, dst_rect;
2558     UINT src_pitch;
2559     POINT p;
2560
2561     TRACE("dst_surface %p, dst_point %s, src_surface %p, src_rect %s.\n",
2562             dst_surface, wine_dbgstr_point(dst_point),
2563             src_surface, wine_dbgstr_rect(src_rect));
2564
2565     src_format = src_surface->resource.format;
2566     dst_format = dst_surface->resource.format;
2567
2568     if (src_format->id != dst_format->id)
2569     {
2570         WARN("Source and destination surfaces should have the same format.\n");
2571         return WINED3DERR_INVALIDCALL;
2572     }
2573
2574     if (!dst_point)
2575     {
2576         p.x = 0;
2577         p.y = 0;
2578         dst_point = &p;
2579     }
2580     else if (dst_point->x < 0 || dst_point->y < 0)
2581     {
2582         WARN("Invalid destination point.\n");
2583         return WINED3DERR_INVALIDCALL;
2584     }
2585
2586     if (!src_rect)
2587     {
2588         r.left = 0;
2589         r.top = 0;
2590         r.right = src_surface->resource.width;
2591         r.bottom = src_surface->resource.height;
2592         src_rect = &r;
2593     }
2594     else if (src_rect->left < 0 || src_rect->left >= src_rect->right
2595             || src_rect->top < 0 || src_rect->top >= src_rect->bottom)
2596     {
2597         WARN("Invalid source rectangle.\n");
2598         return WINED3DERR_INVALIDCALL;
2599     }
2600
2601     src_w = src_surface->resource.width;
2602     src_h = src_surface->resource.height;
2603
2604     dst_w = dst_surface->resource.width;
2605     dst_h = dst_surface->resource.height;
2606
2607     update_w = src_rect->right - src_rect->left;
2608     update_h = src_rect->bottom - src_rect->top;
2609
2610     if (update_w > dst_w || dst_point->x > dst_w - update_w
2611             || update_h > dst_h || dst_point->y > dst_h - update_h)
2612     {
2613         WARN("Destination out of bounds.\n");
2614         return WINED3DERR_INVALIDCALL;
2615     }
2616
2617     if ((src_format->flags & WINED3DFMT_FLAG_BLOCKS) && !surface_check_block_align(src_surface, src_rect))
2618     {
2619         WARN("Source rectangle not block-aligned.\n");
2620         return WINED3DERR_INVALIDCALL;
2621     }
2622
2623     SetRect(&dst_rect, dst_point->x, dst_point->y, dst_point->x + update_w, dst_point->y + update_h);
2624     if ((dst_format->flags & WINED3DFMT_FLAG_BLOCKS) && !surface_check_block_align(dst_surface, &dst_rect))
2625     {
2626         WARN("Destination rectangle not block-aligned.\n");
2627         return WINED3DERR_INVALIDCALL;
2628     }
2629
2630     /* Use wined3d_surface_blt() instead of uploading directly if we need conversion. */
2631     d3dfmt_get_conv(dst_surface, FALSE, TRUE, &format, &convert);
2632     if (convert != WINED3D_CT_NONE || format.convert)
2633         return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, src_rect, 0, NULL, WINED3D_TEXF_POINT);
2634
2635     context = context_acquire(dst_surface->resource.device, NULL);
2636     gl_info = context->gl_info;
2637
2638     /* Only load the surface for partial updates. For newly allocated texture
2639      * the texture wouldn't be the current location, and we'd upload zeroes
2640      * just to overwrite them again. */
2641     if (update_w == dst_w && update_h == dst_h)
2642         surface_prepare_texture(dst_surface, context, FALSE);
2643     else
2644         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
2645     surface_bind(dst_surface, context, FALSE);
2646
2647     data.buffer_object = src_surface->pbo;
2648     data.addr = src_surface->resource.allocatedMemory;
2649     src_pitch = wined3d_surface_get_pitch(src_surface);
2650
2651     surface_upload_data(dst_surface, gl_info, src_format, src_rect, src_pitch, dst_point, FALSE, &data);
2652
2653     invalidate_active_texture(dst_surface->resource.device, context);
2654
2655     context_release(context);
2656
2657     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
2658     return WINED3D_OK;
2659 }
2660
2661 /* This call just allocates the texture, the caller is responsible for binding
2662  * the correct texture. */
2663 /* Context activation is done by the caller. */
2664 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2665         const struct wined3d_format *format, BOOL srgb)
2666 {
2667     BOOL enable_client_storage = FALSE;
2668     GLsizei width = surface->pow2Width;
2669     GLsizei height = surface->pow2Height;
2670     const BYTE *mem = NULL;
2671     GLenum internal;
2672
2673     if (srgb)
2674     {
2675         internal = format->glGammaInternal;
2676     }
2677     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2678     {
2679         internal = format->rtInternal;
2680     }
2681     else
2682     {
2683         internal = format->glInternal;
2684     }
2685
2686     if (!internal)
2687         FIXME("No GL internal format for format %s.\n", debug_d3dformat(format->id));
2688
2689     if (format->flags & WINED3DFMT_FLAG_HEIGHT_SCALE)
2690     {
2691         height *= format->height_scale.numerator;
2692         height /= format->height_scale.denominator;
2693     }
2694
2695     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2696             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2697             internal, width, height, format->glFormat, format->glType);
2698
2699     ENTER_GL();
2700
2701     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2702     {
2703         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2704                 || !surface->resource.allocatedMemory)
2705         {
2706             /* In some cases we want to disable client storage.
2707              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2708              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2709              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2710              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2711              */
2712             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2713             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2714             surface->flags &= ~SFLAG_CLIENT;
2715             enable_client_storage = TRUE;
2716         }
2717         else
2718         {
2719             surface->flags |= SFLAG_CLIENT;
2720
2721             /* Point OpenGL to our allocated texture memory. Do not use
2722              * resource.allocatedMemory here because it might point into a
2723              * PBO. Instead use heapMemory, but get the alignment right. */
2724             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2725                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2726         }
2727     }
2728
2729     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2730     {
2731         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2732                 internal, width, height, 0, surface->resource.size, mem));
2733         checkGLcall("glCompressedTexImage2DARB");
2734     }
2735     else
2736     {
2737         glTexImage2D(surface->texture_target, surface->texture_level,
2738                 internal, width, height, 0, format->glFormat, format->glType, mem);
2739         checkGLcall("glTexImage2D");
2740     }
2741
2742     if(enable_client_storage) {
2743         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2744         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2745     }
2746     LEAVE_GL();
2747 }
2748
2749 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2750  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2751 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2752 /* GL locking is done by the caller */
2753 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2754 {
2755     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2756     struct wined3d_renderbuffer_entry *entry;
2757     GLuint renderbuffer = 0;
2758     unsigned int src_width, src_height;
2759     unsigned int width, height;
2760
2761     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2762     {
2763         width = rt->pow2Width;
2764         height = rt->pow2Height;
2765     }
2766     else
2767     {
2768         width = surface->pow2Width;
2769         height = surface->pow2Height;
2770     }
2771
2772     src_width = surface->pow2Width;
2773     src_height = surface->pow2Height;
2774
2775     /* A depth stencil smaller than the render target is not valid */
2776     if (width > src_width || height > src_height) return;
2777
2778     /* Remove any renderbuffer set if the sizes match */
2779     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2780             || (width == src_width && height == src_height))
2781     {
2782         surface->current_renderbuffer = NULL;
2783         return;
2784     }
2785
2786     /* Look if we've already got a renderbuffer of the correct dimensions */
2787     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2788     {
2789         if (entry->width == width && entry->height == height)
2790         {
2791             renderbuffer = entry->id;
2792             surface->current_renderbuffer = entry;
2793             break;
2794         }
2795     }
2796
2797     if (!renderbuffer)
2798     {
2799         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2800         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2801         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2802                 surface->resource.format->glInternal, width, height);
2803
2804         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2805         entry->width = width;
2806         entry->height = height;
2807         entry->id = renderbuffer;
2808         list_add_head(&surface->renderbuffers, &entry->entry);
2809
2810         surface->current_renderbuffer = entry;
2811     }
2812
2813     checkGLcall("set_compatible_renderbuffer");
2814 }
2815
2816 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2817 {
2818     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2819
2820     TRACE("surface %p.\n", surface);
2821
2822     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2823     {
2824         ERR("Surface %p is not on a swapchain.\n", surface);
2825         return GL_NONE;
2826     }
2827
2828     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2829     {
2830         if (swapchain->render_to_fbo)
2831         {
2832             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2833             return GL_COLOR_ATTACHMENT0;
2834         }
2835         TRACE("Returning GL_BACK\n");
2836         return GL_BACK;
2837     }
2838     else if (surface == swapchain->front_buffer)
2839     {
2840         TRACE("Returning GL_FRONT\n");
2841         return GL_FRONT;
2842     }
2843
2844     FIXME("Higher back buffer, returning GL_BACK\n");
2845     return GL_BACK;
2846 }
2847
2848 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2849 void surface_add_dirty_rect(struct wined3d_surface *surface, const struct wined3d_box *dirty_rect)
2850 {
2851     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2852
2853     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2854         /* No partial locking for textures yet. */
2855         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2856
2857     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2858     if (dirty_rect)
2859     {
2860         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->left);
2861         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->top);
2862         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->right);
2863         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->bottom);
2864     }
2865     else
2866     {
2867         surface->dirtyRect.left = 0;
2868         surface->dirtyRect.top = 0;
2869         surface->dirtyRect.right = surface->resource.width;
2870         surface->dirtyRect.bottom = surface->resource.height;
2871     }
2872
2873     /* if the container is a texture then mark it dirty. */
2874     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2875     {
2876         TRACE("Passing to container.\n");
2877         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2878     }
2879 }
2880
2881 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2882 {
2883     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2884     BOOL ck_changed;
2885
2886     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2887
2888     if (surface->resource.pool == WINED3D_POOL_SCRATCH)
2889     {
2890         ERR("Not supported on scratch surfaces.\n");
2891         return WINED3DERR_INVALIDCALL;
2892     }
2893
2894     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2895
2896     /* Reload if either the texture and sysmem have different ideas about the
2897      * color key, or the actual key values changed. */
2898     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2899             && (surface->gl_color_key.color_space_low_value != surface->src_blt_color_key.color_space_low_value
2900             || surface->gl_color_key.color_space_high_value != surface->src_blt_color_key.color_space_high_value)))
2901     {
2902         TRACE("Reloading because of color keying\n");
2903         /* To perform the color key conversion we need a sysmem copy of
2904          * the surface. Make sure we have it. */
2905
2906         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2907         /* Make sure the texture is reloaded because of the color key change,
2908          * this kills performance though :( */
2909         /* TODO: This is not necessarily needed with hw palettized texture support. */
2910         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2911         /* Switching color keying on / off may change the internal format. */
2912         if (ck_changed)
2913             surface_force_reload(surface);
2914     }
2915     else if (!(surface->flags & flag))
2916     {
2917         TRACE("Reloading because surface is dirty.\n");
2918     }
2919     else
2920     {
2921         TRACE("surface is already in texture\n");
2922         return WINED3D_OK;
2923     }
2924
2925     /* No partial locking for textures yet. */
2926     surface_load_location(surface, flag, NULL);
2927     surface_evict_sysmem(surface);
2928
2929     return WINED3D_OK;
2930 }
2931
2932 /* See also float_16_to_32() in wined3d_private.h */
2933 static inline unsigned short float_32_to_16(const float *in)
2934 {
2935     int exp = 0;
2936     float tmp = fabsf(*in);
2937     unsigned int mantissa;
2938     unsigned short ret;
2939
2940     /* Deal with special numbers */
2941     if (*in == 0.0f)
2942         return 0x0000;
2943     if (isnan(*in))
2944         return 0x7c01;
2945     if (isinf(*in))
2946         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2947
2948     if (tmp < powf(2, 10))
2949     {
2950         do
2951         {
2952             tmp = tmp * 2.0f;
2953             exp--;
2954         } while (tmp < powf(2, 10));
2955     }
2956     else if (tmp >= powf(2, 11))
2957     {
2958         do
2959         {
2960             tmp /= 2.0f;
2961             exp++;
2962         } while (tmp >= powf(2, 11));
2963     }
2964
2965     mantissa = (unsigned int)tmp;
2966     if (tmp - mantissa >= 0.5f)
2967         ++mantissa; /* Round to nearest, away from zero. */
2968
2969     exp += 10;  /* Normalize the mantissa. */
2970     exp += 15;  /* Exponent is encoded with excess 15. */
2971
2972     if (exp > 30) /* too big */
2973     {
2974         ret = 0x7c00; /* INF */
2975     }
2976     else if (exp <= 0)
2977     {
2978         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2979         while (exp <= 0)
2980         {
2981             mantissa = mantissa >> 1;
2982             ++exp;
2983         }
2984         ret = mantissa & 0x3ff;
2985     }
2986     else
2987     {
2988         ret = (exp << 10) | (mantissa & 0x3ff);
2989     }
2990
2991     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2992     return ret;
2993 }
2994
2995 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2996 {
2997     ULONG refcount;
2998
2999     TRACE("Surface %p, container %p of type %#x.\n",
3000             surface, surface->container.u.base, surface->container.type);
3001
3002     switch (surface->container.type)
3003     {
3004         case WINED3D_CONTAINER_TEXTURE:
3005             return wined3d_texture_incref(surface->container.u.texture);
3006
3007         case WINED3D_CONTAINER_SWAPCHAIN:
3008             return wined3d_swapchain_incref(surface->container.u.swapchain);
3009
3010         default:
3011             ERR("Unhandled container type %#x.\n", surface->container.type);
3012         case WINED3D_CONTAINER_NONE:
3013             break;
3014     }
3015
3016     refcount = InterlockedIncrement(&surface->resource.ref);
3017     TRACE("%p increasing refcount to %u.\n", surface, refcount);
3018
3019     return refcount;
3020 }
3021
3022 /* Do not call while under the GL lock. */
3023 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
3024 {
3025     ULONG refcount;
3026
3027     TRACE("Surface %p, container %p of type %#x.\n",
3028             surface, surface->container.u.base, surface->container.type);
3029
3030     switch (surface->container.type)
3031     {
3032         case WINED3D_CONTAINER_TEXTURE:
3033             return wined3d_texture_decref(surface->container.u.texture);
3034
3035         case WINED3D_CONTAINER_SWAPCHAIN:
3036             return wined3d_swapchain_decref(surface->container.u.swapchain);
3037
3038         default:
3039             ERR("Unhandled container type %#x.\n", surface->container.type);
3040         case WINED3D_CONTAINER_NONE:
3041             break;
3042     }
3043
3044     refcount = InterlockedDecrement(&surface->resource.ref);
3045     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
3046
3047     if (!refcount)
3048     {
3049         surface_cleanup(surface);
3050         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
3051
3052         TRACE("Destroyed surface %p.\n", surface);
3053         HeapFree(GetProcessHeap(), 0, surface);
3054     }
3055
3056     return refcount;
3057 }
3058
3059 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
3060 {
3061     return resource_set_priority(&surface->resource, priority);
3062 }
3063
3064 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
3065 {
3066     return resource_get_priority(&surface->resource);
3067 }
3068
3069 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
3070 {
3071     TRACE("surface %p.\n", surface);
3072
3073     if (!surface->resource.device->d3d_initialized)
3074     {
3075         ERR("D3D not initialized.\n");
3076         return;
3077     }
3078
3079     surface_internal_preload(surface, SRGB_ANY);
3080 }
3081
3082 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
3083 {
3084     TRACE("surface %p.\n", surface);
3085
3086     return surface->resource.parent;
3087 }
3088
3089 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
3090 {
3091     TRACE("surface %p.\n", surface);
3092
3093     return &surface->resource;
3094 }
3095
3096 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
3097 {
3098     TRACE("surface %p, flags %#x.\n", surface, flags);
3099
3100     switch (flags)
3101     {
3102         case WINEDDGBS_CANBLT:
3103         case WINEDDGBS_ISBLTDONE:
3104             return WINED3D_OK;
3105
3106         default:
3107             return WINED3DERR_INVALIDCALL;
3108     }
3109 }
3110
3111 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
3112 {
3113     TRACE("surface %p, flags %#x.\n", surface, flags);
3114
3115     /* XXX: DDERR_INVALIDSURFACETYPE */
3116
3117     switch (flags)
3118     {
3119         case WINEDDGFS_CANFLIP:
3120         case WINEDDGFS_ISFLIPDONE:
3121             return WINED3D_OK;
3122
3123         default:
3124             return WINED3DERR_INVALIDCALL;
3125     }
3126 }
3127
3128 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
3129 {
3130     TRACE("surface %p.\n", surface);
3131
3132     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
3133     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
3134 }
3135
3136 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
3137 {
3138     TRACE("surface %p.\n", surface);
3139
3140     surface->flags &= ~SFLAG_LOST;
3141     return WINED3D_OK;
3142 }
3143
3144 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
3145 {
3146     TRACE("surface %p, palette %p.\n", surface, palette);
3147
3148     if (surface->palette == palette)
3149     {
3150         TRACE("Nop palette change.\n");
3151         return WINED3D_OK;
3152     }
3153
3154     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
3155         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
3156
3157     surface->palette = palette;
3158
3159     if (palette)
3160     {
3161         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3162             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
3163
3164         surface->surface_ops->surface_realize_palette(surface);
3165     }
3166
3167     return WINED3D_OK;
3168 }
3169
3170 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
3171         DWORD flags, const struct wined3d_color_key *color_key)
3172 {
3173     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
3174
3175     if (flags & WINEDDCKEY_COLORSPACE)
3176     {
3177         FIXME(" colorkey value not supported (%08x) !\n", flags);
3178         return WINED3DERR_INVALIDCALL;
3179     }
3180
3181     /* Dirtify the surface, but only if a key was changed. */
3182     if (color_key)
3183     {
3184         switch (flags & ~WINEDDCKEY_COLORSPACE)
3185         {
3186             case WINEDDCKEY_DESTBLT:
3187                 surface->dst_blt_color_key = *color_key;
3188                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3189                 break;
3190
3191             case WINEDDCKEY_DESTOVERLAY:
3192                 surface->dst_overlay_color_key = *color_key;
3193                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3194                 break;
3195
3196             case WINEDDCKEY_SRCOVERLAY:
3197                 surface->src_overlay_color_key = *color_key;
3198                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3199                 break;
3200
3201             case WINEDDCKEY_SRCBLT:
3202                 surface->src_blt_color_key = *color_key;
3203                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3204                 break;
3205         }
3206     }
3207     else
3208     {
3209         switch (flags & ~WINEDDCKEY_COLORSPACE)
3210         {
3211             case WINEDDCKEY_DESTBLT:
3212                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3213                 break;
3214
3215             case WINEDDCKEY_DESTOVERLAY:
3216                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3217                 break;
3218
3219             case WINEDDCKEY_SRCOVERLAY:
3220                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3221                 break;
3222
3223             case WINEDDCKEY_SRCBLT:
3224                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3225                 break;
3226         }
3227     }
3228
3229     return WINED3D_OK;
3230 }
3231
3232 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3233 {
3234     TRACE("surface %p.\n", surface);
3235
3236     return surface->palette;
3237 }
3238
3239 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3240 {
3241     const struct wined3d_format *format = surface->resource.format;
3242     DWORD pitch;
3243
3244     TRACE("surface %p.\n", surface);
3245
3246     if (format->flags & WINED3DFMT_FLAG_BLOCKS)
3247     {
3248         /* Since compressed formats are block based, pitch means the amount of
3249          * bytes to the next row of block rather than the next row of pixels. */
3250         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3251         pitch = row_block_count * format->block_byte_count;
3252     }
3253     else
3254     {
3255         unsigned char alignment = surface->resource.device->surface_alignment;
3256         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3257         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3258     }
3259
3260     TRACE("Returning %u.\n", pitch);
3261
3262     return pitch;
3263 }
3264
3265 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3266 {
3267     TRACE("surface %p, mem %p.\n", surface, mem);
3268
3269     if (surface->resource.map_count || (surface->flags & SFLAG_DCINUSE))
3270     {
3271         WARN("Surface is mapped or the DC is in use.\n");
3272         return WINED3DERR_INVALIDCALL;
3273     }
3274
3275     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3276     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3277     {
3278         ERR("Not supported on render targets.\n");
3279         return WINED3DERR_INVALIDCALL;
3280     }
3281
3282     if (mem && mem != surface->resource.allocatedMemory)
3283     {
3284         void *release = NULL;
3285
3286         /* Do I have to copy the old surface content? */
3287         if (surface->flags & SFLAG_DIBSECTION)
3288         {
3289             DeleteDC(surface->hDC);
3290             DeleteObject(surface->dib.DIBsection);
3291             surface->dib.bitmap_data = NULL;
3292             surface->resource.allocatedMemory = NULL;
3293             surface->hDC = NULL;
3294             surface->flags &= ~SFLAG_DIBSECTION;
3295         }
3296         else if (!(surface->flags & SFLAG_USERPTR))
3297         {
3298             release = surface->resource.heapMemory;
3299             surface->resource.heapMemory = NULL;
3300         }
3301         surface->resource.allocatedMemory = mem;
3302         surface->flags |= SFLAG_USERPTR;
3303
3304         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3305         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3306
3307         /* For client textures OpenGL has to be notified. */
3308         if (surface->flags & SFLAG_CLIENT)
3309             surface_release_client_storage(surface);
3310
3311         /* Now free the old memory if any. */
3312         HeapFree(GetProcessHeap(), 0, release);
3313     }
3314     else if (surface->flags & SFLAG_USERPTR)
3315     {
3316         /* HeapMemory should be NULL already. */
3317         if (surface->resource.heapMemory)
3318             ERR("User pointer surface has heap memory allocated.\n");
3319
3320         if (!mem)
3321         {
3322             surface->resource.allocatedMemory = NULL;
3323             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3324
3325             if (surface->flags & SFLAG_CLIENT)
3326                 surface_release_client_storage(surface);
3327
3328             surface_prepare_system_memory(surface);
3329         }
3330
3331         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3332     }
3333
3334     return WINED3D_OK;
3335 }
3336
3337 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3338 {
3339     LONG w, h;
3340
3341     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3342
3343     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3344     {
3345         WARN("Not an overlay surface.\n");
3346         return WINEDDERR_NOTAOVERLAYSURFACE;
3347     }
3348
3349     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3350     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3351     surface->overlay_destrect.left = x;
3352     surface->overlay_destrect.top = y;
3353     surface->overlay_destrect.right = x + w;
3354     surface->overlay_destrect.bottom = y + h;
3355
3356     surface_draw_overlay(surface);
3357
3358     return WINED3D_OK;
3359 }
3360
3361 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3362 {
3363     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3364
3365     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3366     {
3367         TRACE("Not an overlay surface.\n");
3368         return WINEDDERR_NOTAOVERLAYSURFACE;
3369     }
3370
3371     if (!surface->overlay_dest)
3372     {
3373         TRACE("Overlay not visible.\n");
3374         *x = 0;
3375         *y = 0;
3376         return WINEDDERR_OVERLAYNOTVISIBLE;
3377     }
3378
3379     *x = surface->overlay_destrect.left;
3380     *y = surface->overlay_destrect.top;
3381
3382     TRACE("Returning position %d, %d.\n", *x, *y);
3383
3384     return WINED3D_OK;
3385 }
3386
3387 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3388         DWORD flags, struct wined3d_surface *ref)
3389 {
3390     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3391
3392     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3393     {
3394         TRACE("Not an overlay surface.\n");
3395         return WINEDDERR_NOTAOVERLAYSURFACE;
3396     }
3397
3398     return WINED3D_OK;
3399 }
3400
3401 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3402         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3403 {
3404     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3405             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3406
3407     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3408     {
3409         WARN("Not an overlay surface.\n");
3410         return WINEDDERR_NOTAOVERLAYSURFACE;
3411     }
3412     else if (!dst_surface)
3413     {
3414         WARN("Dest surface is NULL.\n");
3415         return WINED3DERR_INVALIDCALL;
3416     }
3417
3418     if (src_rect)
3419     {
3420         surface->overlay_srcrect = *src_rect;
3421     }
3422     else
3423     {
3424         surface->overlay_srcrect.left = 0;
3425         surface->overlay_srcrect.top = 0;
3426         surface->overlay_srcrect.right = surface->resource.width;
3427         surface->overlay_srcrect.bottom = surface->resource.height;
3428     }
3429
3430     if (dst_rect)
3431     {
3432         surface->overlay_destrect = *dst_rect;
3433     }
3434     else
3435     {
3436         surface->overlay_destrect.left = 0;
3437         surface->overlay_destrect.top = 0;
3438         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3439         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3440     }
3441
3442     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3443     {
3444         surface->overlay_dest = NULL;
3445         list_remove(&surface->overlay_entry);
3446     }
3447
3448     if (flags & WINEDDOVER_SHOW)
3449     {
3450         if (surface->overlay_dest != dst_surface)
3451         {
3452             surface->overlay_dest = dst_surface;
3453             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3454         }
3455     }
3456     else if (flags & WINEDDOVER_HIDE)
3457     {
3458         /* tests show that the rectangles are erased on hide */
3459         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3460         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3461         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3462         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3463         surface->overlay_dest = NULL;
3464     }
3465
3466     surface_draw_overlay(surface);
3467
3468     return WINED3D_OK;
3469 }
3470
3471 HRESULT CDECL wined3d_surface_update_desc(struct wined3d_surface *surface,
3472         UINT width, UINT height, enum wined3d_format_id format_id,
3473         enum wined3d_multisample_type multisample_type, UINT multisample_quality)
3474 {
3475     struct wined3d_device *device = surface->resource.device;
3476     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
3477     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
3478     UINT resource_size = wined3d_format_calculate_size(format, device->surface_alignment, width, height);
3479
3480     TRACE("surface %p, width %u, height %u, format %s, multisample_type %#x, multisample_quality %u.\n",
3481             surface, width, height, debug_d3dformat(format_id), multisample_type, multisample_type);
3482
3483     if (!resource_size)
3484         return WINED3DERR_INVALIDCALL;
3485
3486     if (device->d3d_initialized)
3487         surface->resource.resource_ops->resource_unload(&surface->resource);
3488
3489     if (surface->flags & SFLAG_DIBSECTION)
3490     {
3491         DeleteDC(surface->hDC);
3492         DeleteObject(surface->dib.DIBsection);
3493         surface->dib.bitmap_data = NULL;
3494         surface->flags &= ~SFLAG_DIBSECTION;
3495     }
3496
3497     surface->flags &= ~(SFLAG_LOCATIONS | SFLAG_USERPTR);
3498     surface->resource.allocatedMemory = NULL;
3499     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
3500     surface->resource.heapMemory = NULL;
3501
3502     surface->resource.width = width;
3503     surface->resource.height = height;
3504     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[ARB_TEXTURE_RECTANGLE]
3505             || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
3506     {
3507         surface->pow2Width = width;
3508         surface->pow2Height = height;
3509     }
3510     else
3511     {
3512         surface->pow2Width = surface->pow2Height = 1;
3513         while (surface->pow2Width < width)
3514             surface->pow2Width <<= 1;
3515         while (surface->pow2Height < height)
3516             surface->pow2Height <<= 1;
3517     }
3518
3519     if (surface->pow2Width != width || surface->pow2Height != height)
3520         surface->flags |= SFLAG_NONPOW2;
3521     else
3522         surface->flags &= ~SFLAG_NONPOW2;
3523
3524     surface->resource.format = format;
3525     surface->resource.multisample_type = multisample_type;
3526     surface->resource.multisample_quality = multisample_quality;
3527     surface->resource.size = resource_size;
3528
3529     if (!surface_init_sysmem(surface))
3530         return E_OUTOFMEMORY;
3531
3532     return WINED3D_OK;
3533 }
3534
3535 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3536         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3537 {
3538     unsigned short *dst_s;
3539     const float *src_f;
3540     unsigned int x, y;
3541
3542     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3543
3544     for (y = 0; y < h; ++y)
3545     {
3546         src_f = (const float *)(src + y * pitch_in);
3547         dst_s = (unsigned short *) (dst + y * pitch_out);
3548         for (x = 0; x < w; ++x)
3549         {
3550             dst_s[x] = float_32_to_16(src_f + x);
3551         }
3552     }
3553 }
3554
3555 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3556         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3557 {
3558     static const unsigned char convert_5to8[] =
3559     {
3560         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3561         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3562         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3563         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3564     };
3565     static const unsigned char convert_6to8[] =
3566     {
3567         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3568         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3569         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3570         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3571         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3572         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3573         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3574         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3575     };
3576     unsigned int x, y;
3577
3578     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3579
3580     for (y = 0; y < h; ++y)
3581     {
3582         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3583         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3584         for (x = 0; x < w; ++x)
3585         {
3586             WORD pixel = src_line[x];
3587             dst_line[x] = 0xff000000
3588                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3589                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3590                     | convert_5to8[(pixel & 0x001f)];
3591         }
3592     }
3593 }
3594
3595 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3596  * in both cases we're just setting the X / Alpha channel to 0xff. */
3597 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3598         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3599 {
3600     unsigned int x, y;
3601
3602     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3603
3604     for (y = 0; y < h; ++y)
3605     {
3606         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3607         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3608
3609         for (x = 0; x < w; ++x)
3610         {
3611             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3612         }
3613     }
3614 }
3615
3616 static inline BYTE cliptobyte(int x)
3617 {
3618     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3619 }
3620
3621 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3622         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3623 {
3624     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3625     unsigned int x, y;
3626
3627     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3628
3629     for (y = 0; y < h; ++y)
3630     {
3631         const BYTE *src_line = src + y * pitch_in;
3632         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3633         for (x = 0; x < w; ++x)
3634         {
3635             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3636              *     C = Y - 16; D = U - 128; E = V - 128;
3637              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3638              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3639              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3640              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3641              * U and V are shared between the pixels. */
3642             if (!(x & 1)) /* For every even pixel, read new U and V. */
3643             {
3644                 d = (int) src_line[1] - 128;
3645                 e = (int) src_line[3] - 128;
3646                 r2 = 409 * e + 128;
3647                 g2 = - 100 * d - 208 * e + 128;
3648                 b2 = 516 * d + 128;
3649             }
3650             c2 = 298 * ((int) src_line[0] - 16);
3651             dst_line[x] = 0xff000000
3652                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3653                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3654                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3655                 /* Scale RGB values to 0..255 range,
3656                  * then clip them if still not in range (may be negative),
3657                  * then shift them within DWORD if necessary. */
3658             src_line += 2;
3659         }
3660     }
3661 }
3662
3663 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3664         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3665 {
3666     unsigned int x, y;
3667     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3668
3669     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3670
3671     for (y = 0; y < h; ++y)
3672     {
3673         const BYTE *src_line = src + y * pitch_in;
3674         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3675         for (x = 0; x < w; ++x)
3676         {
3677             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3678              *     C = Y - 16; D = U - 128; E = V - 128;
3679              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3680              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3681              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3682              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3683              * U and V are shared between the pixels. */
3684             if (!(x & 1)) /* For every even pixel, read new U and V. */
3685             {
3686                 d = (int) src_line[1] - 128;
3687                 e = (int) src_line[3] - 128;
3688                 r2 = 409 * e + 128;
3689                 g2 = - 100 * d - 208 * e + 128;
3690                 b2 = 516 * d + 128;
3691             }
3692             c2 = 298 * ((int) src_line[0] - 16);
3693             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3694                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3695                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3696                 /* Scale RGB values to 0..255 range,
3697                  * then clip them if still not in range (may be negative),
3698                  * then shift them within DWORD if necessary. */
3699             src_line += 2;
3700         }
3701     }
3702 }
3703
3704 struct d3dfmt_convertor_desc
3705 {
3706     enum wined3d_format_id from, to;
3707     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3708 };
3709
3710 static const struct d3dfmt_convertor_desc convertors[] =
3711 {
3712     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3713     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3714     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3715     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3716     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3717     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3718 };
3719
3720 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3721         enum wined3d_format_id to)
3722 {
3723     unsigned int i;
3724
3725     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3726     {
3727         if (convertors[i].from == from && convertors[i].to == to)
3728             return &convertors[i];
3729     }
3730
3731     return NULL;
3732 }
3733
3734 /*****************************************************************************
3735  * surface_convert_format
3736  *
3737  * Creates a duplicate of a surface in a different format. Is used by Blt to
3738  * blit between surfaces with different formats.
3739  *
3740  * Parameters
3741  *  source: Source surface
3742  *  fmt: Requested destination format
3743  *
3744  *****************************************************************************/
3745 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3746 {
3747     struct wined3d_map_desc src_map, dst_map;
3748     const struct d3dfmt_convertor_desc *conv;
3749     struct wined3d_surface *ret = NULL;
3750     HRESULT hr;
3751
3752     conv = find_convertor(source->resource.format->id, to_fmt);
3753     if (!conv)
3754     {
3755         FIXME("Cannot find a conversion function from format %s to %s.\n",
3756                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3757         return NULL;
3758     }
3759
3760     wined3d_surface_create(source->resource.device, source->resource.width,
3761             source->resource.height, to_fmt, 0 /* level */, 0 /* usage */, WINED3D_POOL_SCRATCH,
3762             WINED3D_MULTISAMPLE_NONE /* TODO: Multisampled conversion */, 0 /* MultiSampleQuality */,
3763             source->surface_type, WINED3D_SURFACE_MAPPABLE | WINED3D_SURFACE_DISCARD,
3764             NULL /* parent */, &wined3d_null_parent_ops, &ret);
3765     if (!ret)
3766     {
3767         ERR("Failed to create a destination surface for conversion.\n");
3768         return NULL;
3769     }
3770
3771     memset(&src_map, 0, sizeof(src_map));
3772     memset(&dst_map, 0, sizeof(dst_map));
3773
3774     if (FAILED(hr = wined3d_surface_map(source, &src_map, NULL, WINED3D_MAP_READONLY)))
3775     {
3776         ERR("Failed to lock the source surface.\n");
3777         wined3d_surface_decref(ret);
3778         return NULL;
3779     }
3780     if (FAILED(hr = wined3d_surface_map(ret, &dst_map, NULL, WINED3D_MAP_READONLY)))
3781     {
3782         ERR("Failed to lock the destination surface.\n");
3783         wined3d_surface_unmap(source);
3784         wined3d_surface_decref(ret);
3785         return NULL;
3786     }
3787
3788     conv->convert(src_map.data, dst_map.data, src_map.row_pitch, dst_map.row_pitch,
3789             source->resource.width, source->resource.height);
3790
3791     wined3d_surface_unmap(ret);
3792     wined3d_surface_unmap(source);
3793
3794     return ret;
3795 }
3796
3797 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3798         unsigned int bpp, UINT pitch, DWORD color)
3799 {
3800     BYTE *first;
3801     int x, y;
3802
3803     /* Do first row */
3804
3805 #define COLORFILL_ROW(type) \
3806 do { \
3807     type *d = (type *)buf; \
3808     for (x = 0; x < width; ++x) \
3809         d[x] = (type)color; \
3810 } while(0)
3811
3812     switch (bpp)
3813     {
3814         case 1:
3815             COLORFILL_ROW(BYTE);
3816             break;
3817
3818         case 2:
3819             COLORFILL_ROW(WORD);
3820             break;
3821
3822         case 3:
3823         {
3824             BYTE *d = buf;
3825             for (x = 0; x < width; ++x, d += 3)
3826             {
3827                 d[0] = (color      ) & 0xFF;
3828                 d[1] = (color >>  8) & 0xFF;
3829                 d[2] = (color >> 16) & 0xFF;
3830             }
3831             break;
3832         }
3833         case 4:
3834             COLORFILL_ROW(DWORD);
3835             break;
3836
3837         default:
3838             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3839             return WINED3DERR_NOTAVAILABLE;
3840     }
3841
3842 #undef COLORFILL_ROW
3843
3844     /* Now copy first row. */
3845     first = buf;
3846     for (y = 1; y < height; ++y)
3847     {
3848         buf += pitch;
3849         memcpy(buf, first, width * bpp);
3850     }
3851
3852     return WINED3D_OK;
3853 }
3854
3855 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3856 {
3857     TRACE("surface %p.\n", surface);
3858
3859     if (!surface->resource.map_count)
3860     {
3861         WARN("Trying to unmap unmapped surface.\n");
3862         return WINEDDERR_NOTLOCKED;
3863     }
3864     --surface->resource.map_count;
3865
3866     surface->surface_ops->surface_unmap(surface);
3867
3868     return WINED3D_OK;
3869 }
3870
3871 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3872         struct wined3d_map_desc *map_desc, const RECT *rect, DWORD flags)
3873 {
3874     const struct wined3d_format *format = surface->resource.format;
3875
3876     TRACE("surface %p, map_desc %p, rect %s, flags %#x.\n",
3877             surface, map_desc, wine_dbgstr_rect(rect), flags);
3878
3879     if (surface->resource.map_count)
3880     {
3881         WARN("Surface is already mapped.\n");
3882         return WINED3DERR_INVALIDCALL;
3883     }
3884
3885     if ((format->flags & WINED3DFMT_FLAG_BLOCKS) && rect
3886             && !surface_check_block_align(surface, rect))
3887     {
3888         WARN("Map rect %s is misaligned for %ux%u blocks.\n",
3889                 wine_dbgstr_rect(rect), format->block_width, format->block_height);
3890
3891         if (surface->resource.pool == WINED3D_POOL_DEFAULT)
3892             return WINED3DERR_INVALIDCALL;
3893     }
3894
3895     ++surface->resource.map_count;
3896
3897     if (!(surface->flags & SFLAG_LOCKABLE))
3898         WARN("Trying to lock unlockable surface.\n");
3899
3900     /* Performance optimization: Count how often a surface is mapped, if it is
3901      * mapped regularly do not throw away the system memory copy. This avoids
3902      * the need to download the surface from OpenGL all the time. The surface
3903      * is still downloaded if the OpenGL texture is changed. */
3904     if (!(surface->flags & SFLAG_DYNLOCK))
3905     {
3906         if (++surface->lockCount > MAXLOCKCOUNT)
3907         {
3908             TRACE("Surface is mapped regularly, not freeing the system memory copy any more.\n");
3909             surface->flags |= SFLAG_DYNLOCK;
3910         }
3911     }
3912
3913     surface->surface_ops->surface_map(surface, rect, flags);
3914
3915     if (format->flags & WINED3DFMT_FLAG_BROKEN_PITCH)
3916         map_desc->row_pitch = surface->resource.width * format->byte_count;
3917     else
3918         map_desc->row_pitch = wined3d_surface_get_pitch(surface);
3919     map_desc->slice_pitch = 0;
3920
3921     if (!rect)
3922     {
3923         map_desc->data = surface->resource.allocatedMemory;
3924         surface->lockedRect.left = 0;
3925         surface->lockedRect.top = 0;
3926         surface->lockedRect.right = surface->resource.width;
3927         surface->lockedRect.bottom = surface->resource.height;
3928     }
3929     else
3930     {
3931         if ((format->flags & (WINED3DFMT_FLAG_BLOCKS | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_BLOCKS)
3932         {
3933             /* Compressed textures are block based, so calculate the offset of
3934              * the block that contains the top-left pixel of the locked rectangle. */
3935             map_desc->data = surface->resource.allocatedMemory
3936                     + ((rect->top / format->block_height) * map_desc->row_pitch)
3937                     + ((rect->left / format->block_width) * format->block_byte_count);
3938         }
3939         else
3940         {
3941             map_desc->data = surface->resource.allocatedMemory
3942                     + (map_desc->row_pitch * rect->top)
3943                     + (rect->left * format->byte_count);
3944         }
3945         surface->lockedRect.left = rect->left;
3946         surface->lockedRect.top = rect->top;
3947         surface->lockedRect.right = rect->right;
3948         surface->lockedRect.bottom = rect->bottom;
3949     }
3950
3951     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3952     TRACE("Returning memory %p, pitch %u.\n", map_desc->data, map_desc->row_pitch);
3953
3954     return WINED3D_OK;
3955 }
3956
3957 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3958 {
3959     struct wined3d_map_desc map;
3960     HRESULT hr;
3961
3962     TRACE("surface %p, dc %p.\n", surface, dc);
3963
3964     if (surface->flags & SFLAG_USERPTR)
3965     {
3966         ERR("Not supported on surfaces with application-provided memory.\n");
3967         return WINEDDERR_NODC;
3968     }
3969
3970     /* Give more detailed info for ddraw. */
3971     if (surface->flags & SFLAG_DCINUSE)
3972         return WINEDDERR_DCALREADYCREATED;
3973
3974     /* Can't GetDC if the surface is locked. */
3975     if (surface->resource.map_count)
3976         return WINED3DERR_INVALIDCALL;
3977
3978     /* Create a DIB section if there isn't a dc yet. */
3979     if (!surface->hDC)
3980     {
3981         if (surface->flags & SFLAG_CLIENT)
3982         {
3983             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3984             surface_release_client_storage(surface);
3985         }
3986         hr = surface_create_dib_section(surface);
3987         if (FAILED(hr))
3988             return WINED3DERR_INVALIDCALL;
3989
3990         /* Use the DIB section from now on if we are not using a PBO. */
3991         if (!(surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM)))
3992         {
3993             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
3994             surface->resource.heapMemory = NULL;
3995             surface->resource.allocatedMemory = surface->dib.bitmap_data;
3996         }
3997     }
3998
3999     /* Map the surface. */
4000     hr = wined3d_surface_map(surface, &map, NULL, 0);
4001     if (FAILED(hr))
4002     {
4003         ERR("Map failed, hr %#x.\n", hr);
4004         return hr;
4005     }
4006
4007     /* Sync the DIB with the PBO. This can't be done earlier because Map()
4008      * activates the allocatedMemory. */
4009     if (surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM))
4010         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
4011
4012     if (surface->resource.format->id == WINED3DFMT_P8_UINT
4013             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
4014     {
4015         /* GetDC on palettized formats is unsupported in D3D9, and the method
4016          * is missing in D3D8, so this should only be used for DX <=7
4017          * surfaces (with non-device palettes). */
4018         const PALETTEENTRY *pal = NULL;
4019
4020         if (surface->palette)
4021         {
4022             pal = surface->palette->palents;
4023         }
4024         else
4025         {
4026             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
4027             struct wined3d_surface *dds_primary = swapchain->front_buffer;
4028
4029             if (dds_primary && dds_primary->palette)
4030                 pal = dds_primary->palette->palents;
4031         }
4032
4033         if (pal)
4034         {
4035             RGBQUAD col[256];
4036             unsigned int i;
4037
4038             for (i = 0; i < 256; ++i)
4039             {
4040                 col[i].rgbRed = pal[i].peRed;
4041                 col[i].rgbGreen = pal[i].peGreen;
4042                 col[i].rgbBlue = pal[i].peBlue;
4043                 col[i].rgbReserved = 0;
4044             }
4045             SetDIBColorTable(surface->hDC, 0, 256, col);
4046         }
4047     }
4048
4049     surface->flags |= SFLAG_DCINUSE;
4050
4051     *dc = surface->hDC;
4052     TRACE("Returning dc %p.\n", *dc);
4053
4054     return WINED3D_OK;
4055 }
4056
4057 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
4058 {
4059     TRACE("surface %p, dc %p.\n", surface, dc);
4060
4061     if (!(surface->flags & SFLAG_DCINUSE))
4062         return WINEDDERR_NODC;
4063
4064     if (surface->hDC != dc)
4065     {
4066         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
4067                 dc, surface->hDC);
4068         return WINEDDERR_NODC;
4069     }
4070
4071     /* Copy the contents of the DIB over to the PBO. */
4072     if ((surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM)) && surface->resource.allocatedMemory)
4073         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
4074
4075     /* We locked first, so unlock now. */
4076     wined3d_surface_unmap(surface);
4077
4078     surface->flags &= ~SFLAG_DCINUSE;
4079
4080     return WINED3D_OK;
4081 }
4082
4083 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
4084 {
4085     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
4086
4087     if (flags)
4088     {
4089         static UINT once;
4090         if (!once++)
4091             FIXME("Ignoring flags %#x.\n", flags);
4092         else
4093             WARN("Ignoring flags %#x.\n", flags);
4094     }
4095
4096     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
4097     {
4098         ERR("Not supported on swapchain surfaces.\n");
4099         return WINEDDERR_NOTFLIPPABLE;
4100     }
4101
4102     /* Flipping is only supported on render targets and overlays. */
4103     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
4104     {
4105         WARN("Tried to flip a non-render target, non-overlay surface.\n");
4106         return WINEDDERR_NOTFLIPPABLE;
4107     }
4108
4109     flip_surface(surface, override);
4110
4111     /* Update overlays if they're visible. */
4112     if ((surface->resource.usage & WINED3DUSAGE_OVERLAY) && surface->overlay_dest)
4113         return surface_draw_overlay(surface);
4114
4115     return WINED3D_OK;
4116 }
4117
4118 /* Do not call while under the GL lock. */
4119 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
4120 {
4121     struct wined3d_device *device = surface->resource.device;
4122
4123     TRACE("iface %p, srgb %#x.\n", surface, srgb);
4124
4125     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4126     {
4127         struct wined3d_texture *texture = surface->container.u.texture;
4128
4129         TRACE("Passing to container (%p).\n", texture);
4130         texture->texture_ops->texture_preload(texture, srgb);
4131     }
4132     else
4133     {
4134         struct wined3d_context *context;
4135
4136         TRACE("(%p) : About to load surface\n", surface);
4137
4138         /* TODO: Use already acquired context when possible. */
4139         context = context_acquire(device, NULL);
4140
4141         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
4142
4143         if (surface->resource.pool == WINED3D_POOL_DEFAULT)
4144         {
4145             /* Tell opengl to try and keep this texture in video ram (well mostly) */
4146             GLclampf tmp;
4147             tmp = 0.9f;
4148             ENTER_GL();
4149             glPrioritizeTextures(1, &surface->texture_name, &tmp);
4150             LEAVE_GL();
4151         }
4152
4153         context_release(context);
4154     }
4155 }
4156
4157 /* Read the framebuffer back into the surface */
4158 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
4159 {
4160     struct wined3d_device *device = surface->resource.device;
4161     const struct wined3d_gl_info *gl_info;
4162     struct wined3d_context *context;
4163     BYTE *mem;
4164     GLint fmt;
4165     GLint type;
4166     BYTE *row, *top, *bottom;
4167     int i;
4168     BOOL bpp;
4169     RECT local_rect;
4170     BOOL srcIsUpsideDown;
4171     GLint rowLen = 0;
4172     GLint skipPix = 0;
4173     GLint skipRow = 0;
4174
4175     context = context_acquire(device, surface);
4176     context_apply_blit_state(context, device);
4177     gl_info = context->gl_info;
4178
4179     ENTER_GL();
4180
4181     /* Select the correct read buffer, and give some debug output.
4182      * There is no need to keep track of the current read buffer or reset it, every part of the code
4183      * that reads sets the read buffer as desired.
4184      */
4185     if (surface_is_offscreen(surface))
4186     {
4187         /* Mapping the primary render target which is not on a swapchain.
4188          * Read from the back buffer. */
4189         TRACE("Mapping offscreen render target.\n");
4190         glReadBuffer(device->offscreenBuffer);
4191         srcIsUpsideDown = TRUE;
4192     }
4193     else
4194     {
4195         /* Onscreen surfaces are always part of a swapchain */
4196         GLenum buffer = surface_get_gl_buffer(surface);
4197         TRACE("Mapping %#x buffer.\n", buffer);
4198         glReadBuffer(buffer);
4199         checkGLcall("glReadBuffer");
4200         srcIsUpsideDown = FALSE;
4201     }
4202
4203     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
4204     if (!rect)
4205     {
4206         local_rect.left = 0;
4207         local_rect.top = 0;
4208         local_rect.right = surface->resource.width;
4209         local_rect.bottom = surface->resource.height;
4210     }
4211     else
4212     {
4213         local_rect = *rect;
4214     }
4215     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
4216
4217     switch (surface->resource.format->id)
4218     {
4219         case WINED3DFMT_P8_UINT:
4220         {
4221             if (primary_render_target_is_p8(device))
4222             {
4223                 /* In case of P8 render targets the index is stored in the alpha component */
4224                 fmt = GL_ALPHA;
4225                 type = GL_UNSIGNED_BYTE;
4226                 mem = dest;
4227                 bpp = surface->resource.format->byte_count;
4228             }
4229             else
4230             {
4231                 /* GL can't return palettized data, so read ARGB pixels into a
4232                  * separate block of memory and convert them into palettized format
4233                  * in software. Slow, but if the app means to use palettized render
4234                  * targets and locks it...
4235                  *
4236                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4237                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4238                  * for the color channels when palettizing the colors.
4239                  */
4240                 fmt = GL_RGB;
4241                 type = GL_UNSIGNED_BYTE;
4242                 pitch *= 3;
4243                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4244                 if (!mem)
4245                 {
4246                     ERR("Out of memory\n");
4247                     LEAVE_GL();
4248                     return;
4249                 }
4250                 bpp = surface->resource.format->byte_count * 3;
4251             }
4252         }
4253         break;
4254
4255         default:
4256             mem = dest;
4257             fmt = surface->resource.format->glFormat;
4258             type = surface->resource.format->glType;
4259             bpp = surface->resource.format->byte_count;
4260     }
4261
4262     if (surface->flags & SFLAG_PBO)
4263     {
4264         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4265         checkGLcall("glBindBufferARB");
4266         if (mem)
4267         {
4268             ERR("mem not null for pbo -- unexpected\n");
4269             mem = NULL;
4270         }
4271     }
4272
4273     /* Save old pixel store pack state */
4274     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4275     checkGLcall("glGetIntegerv");
4276     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4277     checkGLcall("glGetIntegerv");
4278     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4279     checkGLcall("glGetIntegerv");
4280
4281     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4282     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4283     checkGLcall("glPixelStorei");
4284     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4285     checkGLcall("glPixelStorei");
4286     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4287     checkGLcall("glPixelStorei");
4288
4289     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4290             local_rect.right - local_rect.left,
4291             local_rect.bottom - local_rect.top,
4292             fmt, type, mem);
4293     checkGLcall("glReadPixels");
4294
4295     /* Reset previous pixel store pack state */
4296     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4297     checkGLcall("glPixelStorei");
4298     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4299     checkGLcall("glPixelStorei");
4300     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4301     checkGLcall("glPixelStorei");
4302
4303     if (surface->flags & SFLAG_PBO)
4304     {
4305         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4306         checkGLcall("glBindBufferARB");
4307
4308         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4309          * to get a pointer to it and perform the flipping in software. This is a lot
4310          * faster than calling glReadPixels for each line. In case we want more speed
4311          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4312         if (!srcIsUpsideDown)
4313         {
4314             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4315             checkGLcall("glBindBufferARB");
4316
4317             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4318             checkGLcall("glMapBufferARB");
4319         }
4320     }
4321
4322     /* TODO: Merge this with the palettization loop below for P8 targets */
4323     if(!srcIsUpsideDown) {
4324         UINT len, off;
4325         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4326             Flip the lines in software */
4327         len = (local_rect.right - local_rect.left) * bpp;
4328         off = local_rect.left * bpp;
4329
4330         row = HeapAlloc(GetProcessHeap(), 0, len);
4331         if(!row) {
4332             ERR("Out of memory\n");
4333             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4334                 HeapFree(GetProcessHeap(), 0, mem);
4335             LEAVE_GL();
4336             return;
4337         }
4338
4339         top = mem + pitch * local_rect.top;
4340         bottom = mem + pitch * (local_rect.bottom - 1);
4341         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4342             memcpy(row, top + off, len);
4343             memcpy(top + off, bottom + off, len);
4344             memcpy(bottom + off, row, len);
4345             top += pitch;
4346             bottom -= pitch;
4347         }
4348         HeapFree(GetProcessHeap(), 0, row);
4349
4350         /* Unmap the temp PBO buffer */
4351         if (surface->flags & SFLAG_PBO)
4352         {
4353             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4354             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4355         }
4356     }
4357
4358     LEAVE_GL();
4359     context_release(context);
4360
4361     /* For P8 textures we need to perform an inverse palette lookup. This is
4362      * done by searching for a palette index which matches the RGB value.
4363      * Note this isn't guaranteed to work when there are multiple entries for
4364      * the same color but we have no choice. In case of P8 render targets,
4365      * the index is stored in the alpha component so no conversion is needed. */
4366     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4367     {
4368         const PALETTEENTRY *pal = NULL;
4369         DWORD width = pitch / 3;
4370         int x, y, c;
4371
4372         if (surface->palette)
4373         {
4374             pal = surface->palette->palents;
4375         }
4376         else
4377         {
4378             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4379             HeapFree(GetProcessHeap(), 0, mem);
4380             return;
4381         }
4382
4383         for(y = local_rect.top; y < local_rect.bottom; y++) {
4384             for(x = local_rect.left; x < local_rect.right; x++) {
4385                 /*                      start              lines            pixels      */
4386                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4387                 const BYTE *green = blue  + 1;
4388                 const BYTE *red = green + 1;
4389
4390                 for(c = 0; c < 256; c++) {
4391                     if(*red   == pal[c].peRed   &&
4392                        *green == pal[c].peGreen &&
4393                        *blue  == pal[c].peBlue)
4394                     {
4395                         *((BYTE *) dest + y * width + x) = c;
4396                         break;
4397                     }
4398                 }
4399             }
4400         }
4401         HeapFree(GetProcessHeap(), 0, mem);
4402     }
4403 }
4404
4405 /* Read the framebuffer contents into a texture. Note that this function
4406  * doesn't do any kind of flipping. Using this on an onscreen surface will
4407  * result in a flipped D3D texture. */
4408 void surface_load_fb_texture(struct wined3d_surface *surface, BOOL srgb)
4409 {
4410     struct wined3d_device *device = surface->resource.device;
4411     struct wined3d_context *context;
4412
4413     context = context_acquire(device, surface);
4414     device_invalidate_state(device, STATE_FRAMEBUFFER);
4415
4416     surface_prepare_texture(surface, context, srgb);
4417     surface_bind_and_dirtify(surface, context, srgb);
4418
4419     TRACE("Reading back offscreen render target %p.\n", surface);
4420
4421     ENTER_GL();
4422
4423     if (surface_is_offscreen(surface))
4424         glReadBuffer(device->offscreenBuffer);
4425     else
4426         glReadBuffer(surface_get_gl_buffer(surface));
4427     checkGLcall("glReadBuffer");
4428
4429     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4430             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4431     checkGLcall("glCopyTexSubImage2D");
4432
4433     LEAVE_GL();
4434
4435     context_release(context);
4436 }
4437
4438 /* Context activation is done by the caller. */
4439 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4440         struct wined3d_context *context, BOOL srgb)
4441 {
4442     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4443     enum wined3d_conversion_type convert;
4444     struct wined3d_format format;
4445
4446     if (surface->flags & alloc_flag) return;
4447
4448     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4449     if (convert != WINED3D_CT_NONE || format.convert)
4450         surface->flags |= SFLAG_CONVERTED;
4451     else surface->flags &= ~SFLAG_CONVERTED;
4452
4453     surface_bind_and_dirtify(surface, context, srgb);
4454     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4455     surface->flags |= alloc_flag;
4456 }
4457
4458 /* Context activation is done by the caller. */
4459 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4460 {
4461     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4462     {
4463         struct wined3d_texture *texture = surface->container.u.texture;
4464         UINT sub_count = texture->level_count * texture->layer_count;
4465         UINT i;
4466
4467         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4468
4469         for (i = 0; i < sub_count; ++i)
4470         {
4471             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4472             surface_prepare_texture_internal(s, context, srgb);
4473         }
4474
4475         return;
4476     }
4477
4478     surface_prepare_texture_internal(surface, context, srgb);
4479 }
4480
4481 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4482 {
4483     if (multisample)
4484     {
4485         if (surface->rb_multisample)
4486             return;
4487
4488         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4489         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4490         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4491                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4492         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4493     }
4494     else
4495     {
4496         if (surface->rb_resolved)
4497             return;
4498
4499         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4500         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4501         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4502                 surface->pow2Width, surface->pow2Height);
4503         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4504     }
4505 }
4506
4507 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4508         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4509 {
4510     struct wined3d_device *device = surface->resource.device;
4511     UINT pitch = wined3d_surface_get_pitch(surface);
4512     const struct wined3d_gl_info *gl_info;
4513     struct wined3d_context *context;
4514     RECT local_rect;
4515     UINT w, h;
4516
4517     surface_get_rect(surface, rect, &local_rect);
4518
4519     mem += local_rect.top * pitch + local_rect.left * bpp;
4520     w = local_rect.right - local_rect.left;
4521     h = local_rect.bottom - local_rect.top;
4522
4523     /* Activate the correct context for the render target */
4524     context = context_acquire(device, surface);
4525     context_apply_blit_state(context, device);
4526     gl_info = context->gl_info;
4527
4528     ENTER_GL();
4529
4530     if (!surface_is_offscreen(surface))
4531     {
4532         GLenum buffer = surface_get_gl_buffer(surface);
4533         TRACE("Unlocking %#x buffer.\n", buffer);
4534         context_set_draw_buffer(context, buffer);
4535
4536         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4537         glPixelZoom(1.0f, -1.0f);
4538     }
4539     else
4540     {
4541         /* Primary offscreen render target */
4542         TRACE("Offscreen render target.\n");
4543         context_set_draw_buffer(context, device->offscreenBuffer);
4544
4545         glPixelZoom(1.0f, 1.0f);
4546     }
4547
4548     glRasterPos3i(local_rect.left, local_rect.top, 1);
4549     checkGLcall("glRasterPos3i");
4550
4551     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4552     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4553
4554     if (surface->flags & SFLAG_PBO)
4555     {
4556         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4557         checkGLcall("glBindBufferARB");
4558     }
4559
4560     glDrawPixels(w, h, fmt, type, mem);
4561     checkGLcall("glDrawPixels");
4562
4563     if (surface->flags & SFLAG_PBO)
4564     {
4565         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4566         checkGLcall("glBindBufferARB");
4567     }
4568
4569     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4570     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4571
4572     LEAVE_GL();
4573
4574     if (wined3d_settings.strict_draw_ordering
4575             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4576             && surface->container.u.swapchain->front_buffer == surface))
4577         wglFlush();
4578
4579     context_release(context);
4580 }
4581
4582 static BOOL color_in_range(const struct wined3d_color_key *color_key, DWORD color)
4583 {
4584     /* FIXME: Is this really how color keys are supposed to work? I think it
4585      * makes more sense to compare the individual channels. */
4586     return color >= color_key->color_space_low_value
4587             && color <= color_key->color_space_high_value;
4588 }
4589
4590 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4591 {
4592     const struct wined3d_device *device = surface->resource.device;
4593     const struct wined3d_palette *pal = surface->palette;
4594     BOOL index_in_alpha = FALSE;
4595     unsigned int i;
4596
4597     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4598      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4599      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4600      * duplicate entries. Store the color key in the unused alpha component to speed the
4601      * download up and to make conversion unneeded. */
4602     index_in_alpha = primary_render_target_is_p8(device);
4603
4604     if (!pal)
4605     {
4606         ERR("This code should never get entered for DirectDraw!, expect problems\n");
4607         if (index_in_alpha)
4608         {
4609             /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4610              * there's no palette at this time. */
4611             for (i = 0; i < 256; i++) table[i][3] = i;
4612         }
4613     }
4614     else
4615     {
4616         TRACE("Using surface palette %p\n", pal);
4617         /* Get the surface's palette */
4618         for (i = 0; i < 256; ++i)
4619         {
4620             table[i][0] = pal->palents[i].peRed;
4621             table[i][1] = pal->palents[i].peGreen;
4622             table[i][2] = pal->palents[i].peBlue;
4623
4624             /* When index_in_alpha is set the palette index is stored in the
4625              * alpha component. In case of a readback we can then read
4626              * GL_ALPHA. Color keying is handled in BltOverride using a
4627              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4628              * color key itself is passed to glAlphaFunc in other cases the
4629              * alpha component of pixels that should be masked away is set to 0. */
4630             if (index_in_alpha)
4631                 table[i][3] = i;
4632             else if (colorkey && color_in_range(&surface->src_blt_color_key, i))
4633                 table[i][3] = 0x00;
4634             else if (pal->flags & WINEDDPCAPS_ALPHA)
4635                 table[i][3] = pal->palents[i].peFlags;
4636             else
4637                 table[i][3] = 0xFF;
4638         }
4639     }
4640 }
4641
4642 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width, UINT height,
4643         UINT outpitch, enum wined3d_conversion_type conversion_type, struct wined3d_surface *surface)
4644 {
4645     const BYTE *source;
4646     BYTE *dest;
4647
4648     TRACE("src %p, dst %p, pitch %u, width %u, height %u, outpitch %u, conversion_type %#x, surface %p.\n",
4649             src, dst, pitch, width, height, outpitch, conversion_type, surface);
4650
4651     switch (conversion_type)
4652     {
4653         case WINED3D_CT_NONE:
4654         {
4655             memcpy(dst, src, pitch * height);
4656             break;
4657         }
4658
4659         case WINED3D_CT_PALETTED:
4660         case WINED3D_CT_PALETTED_CK:
4661         {
4662             BYTE table[256][4];
4663             unsigned int x, y;
4664
4665             d3dfmt_p8_init_palette(surface, table, (conversion_type == WINED3D_CT_PALETTED_CK));
4666
4667             for (y = 0; y < height; y++)
4668             {
4669                 source = src + pitch * y;
4670                 dest = dst + outpitch * y;
4671                 /* This is an 1 bpp format, using the width here is fine */
4672                 for (x = 0; x < width; x++) {
4673                     BYTE color = *source++;
4674                     *dest++ = table[color][0];
4675                     *dest++ = table[color][1];
4676                     *dest++ = table[color][2];
4677                     *dest++ = table[color][3];
4678                 }
4679             }
4680         }
4681         break;
4682
4683         case WINED3D_CT_CK_565:
4684         {
4685             /* Converting the 565 format in 5551 packed to emulate color-keying.
4686
4687               Note : in all these conversion, it would be best to average the averaging
4688                       pixels to get the color of the pixel that will be color-keyed to
4689                       prevent 'color bleeding'. This will be done later on if ever it is
4690                       too visible.
4691
4692               Note2: Nvidia documents say that their driver does not support alpha + color keying
4693                      on the same surface and disables color keying in such a case
4694             */
4695             unsigned int x, y;
4696             const WORD *Source;
4697             WORD *Dest;
4698
4699             TRACE("Color keyed 565\n");
4700
4701             for (y = 0; y < height; y++) {
4702                 Source = (const WORD *)(src + y * pitch);
4703                 Dest = (WORD *) (dst + y * outpitch);
4704                 for (x = 0; x < width; x++ ) {
4705                     WORD color = *Source++;
4706                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4707                     if (!color_in_range(&surface->src_blt_color_key, color))
4708                         *Dest |= 0x0001;
4709                     Dest++;
4710                 }
4711             }
4712         }
4713         break;
4714
4715         case WINED3D_CT_CK_5551:
4716         {
4717             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4718             unsigned int x, y;
4719             const WORD *Source;
4720             WORD *Dest;
4721             TRACE("Color keyed 5551\n");
4722             for (y = 0; y < height; y++) {
4723                 Source = (const WORD *)(src + y * pitch);
4724                 Dest = (WORD *) (dst + y * outpitch);
4725                 for (x = 0; x < width; x++ ) {
4726                     WORD color = *Source++;
4727                     *Dest = color;
4728                     if (!color_in_range(&surface->src_blt_color_key, color))
4729                         *Dest |= (1 << 15);
4730                     else
4731                         *Dest &= ~(1 << 15);
4732                     Dest++;
4733                 }
4734             }
4735         }
4736         break;
4737
4738         case WINED3D_CT_CK_RGB24:
4739         {
4740             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4741             unsigned int x, y;
4742             for (y = 0; y < height; y++)
4743             {
4744                 source = src + pitch * y;
4745                 dest = dst + outpitch * y;
4746                 for (x = 0; x < width; x++) {
4747                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4748                     DWORD dstcolor = color << 8;
4749                     if (!color_in_range(&surface->src_blt_color_key, color))
4750                         dstcolor |= 0xff;
4751                     *(DWORD*)dest = dstcolor;
4752                     source += 3;
4753                     dest += 4;
4754                 }
4755             }
4756         }
4757         break;
4758
4759         case WINED3D_CT_RGB32_888:
4760         {
4761             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4762             unsigned int x, y;
4763             for (y = 0; y < height; y++)
4764             {
4765                 source = src + pitch * y;
4766                 dest = dst + outpitch * y;
4767                 for (x = 0; x < width; x++) {
4768                     DWORD color = 0xffffff & *(const DWORD*)source;
4769                     DWORD dstcolor = color << 8;
4770                     if (!color_in_range(&surface->src_blt_color_key, color))
4771                         dstcolor |= 0xff;
4772                     *(DWORD*)dest = dstcolor;
4773                     source += 4;
4774                     dest += 4;
4775                 }
4776             }
4777         }
4778         break;
4779
4780         case WINED3D_CT_CK_ARGB32:
4781         {
4782             unsigned int x, y;
4783             for (y = 0; y < height; ++y)
4784             {
4785                 source = src + pitch * y;
4786                 dest = dst + outpitch * y;
4787                 for (x = 0; x < width; ++x)
4788                 {
4789                     DWORD color = *(const DWORD *)source;
4790                     if (color_in_range(&surface->src_blt_color_key, color))
4791                         color &= ~0xff000000;
4792                     *(DWORD*)dest = color;
4793                     source += 4;
4794                     dest += 4;
4795                 }
4796             }
4797         }
4798         break;
4799
4800         default:
4801             ERR("Unsupported conversion type %#x.\n", conversion_type);
4802     }
4803     return WINED3D_OK;
4804 }
4805
4806 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4807 {
4808     /* Flip the surface contents */
4809     /* Flip the DC */
4810     {
4811         HDC tmp;
4812         tmp = front->hDC;
4813         front->hDC = back->hDC;
4814         back->hDC = tmp;
4815     }
4816
4817     /* Flip the DIBsection */
4818     {
4819         HBITMAP tmp = front->dib.DIBsection;
4820         front->dib.DIBsection = back->dib.DIBsection;
4821         back->dib.DIBsection = tmp;
4822     }
4823
4824     /* Flip the surface data */
4825     {
4826         void* tmp;
4827
4828         tmp = front->dib.bitmap_data;
4829         front->dib.bitmap_data = back->dib.bitmap_data;
4830         back->dib.bitmap_data = tmp;
4831
4832         tmp = front->resource.allocatedMemory;
4833         front->resource.allocatedMemory = back->resource.allocatedMemory;
4834         back->resource.allocatedMemory = tmp;
4835
4836         tmp = front->resource.heapMemory;
4837         front->resource.heapMemory = back->resource.heapMemory;
4838         back->resource.heapMemory = tmp;
4839     }
4840
4841     /* Flip the PBO */
4842     {
4843         GLuint tmp_pbo = front->pbo;
4844         front->pbo = back->pbo;
4845         back->pbo = tmp_pbo;
4846     }
4847
4848     /* Flip the opengl texture */
4849     {
4850         GLuint tmp;
4851
4852         tmp = back->texture_name;
4853         back->texture_name = front->texture_name;
4854         front->texture_name = tmp;
4855
4856         tmp = back->texture_name_srgb;
4857         back->texture_name_srgb = front->texture_name_srgb;
4858         front->texture_name_srgb = tmp;
4859
4860         tmp = back->rb_multisample;
4861         back->rb_multisample = front->rb_multisample;
4862         front->rb_multisample = tmp;
4863
4864         tmp = back->rb_resolved;
4865         back->rb_resolved = front->rb_resolved;
4866         front->rb_resolved = tmp;
4867
4868         resource_unload(&back->resource);
4869         resource_unload(&front->resource);
4870     }
4871
4872     {
4873         DWORD tmp_flags = back->flags;
4874         back->flags = front->flags;
4875         front->flags = tmp_flags;
4876     }
4877 }
4878
4879 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4880  * pixel copy calls. */
4881 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4882         const RECT *src_rect, const RECT *dst_rect_in, enum wined3d_texture_filter_type filter)
4883 {
4884     struct wined3d_device *device = dst_surface->resource.device;
4885     float xrel, yrel;
4886     UINT row;
4887     struct wined3d_context *context;
4888     BOOL upsidedown = FALSE;
4889     RECT dst_rect = *dst_rect_in;
4890
4891     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4892      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4893      */
4894     if(dst_rect.top > dst_rect.bottom) {
4895         UINT tmp = dst_rect.bottom;
4896         dst_rect.bottom = dst_rect.top;
4897         dst_rect.top = tmp;
4898         upsidedown = TRUE;
4899     }
4900
4901     context = context_acquire(device, src_surface);
4902     context_apply_blit_state(context, device);
4903     surface_internal_preload(dst_surface, SRGB_RGB);
4904     ENTER_GL();
4905
4906     /* Bind the target texture */
4907     context_bind_texture(context, dst_surface->texture_target, dst_surface->texture_name);
4908     if (surface_is_offscreen(src_surface))
4909     {
4910         TRACE("Reading from an offscreen target\n");
4911         upsidedown = !upsidedown;
4912         glReadBuffer(device->offscreenBuffer);
4913     }
4914     else
4915     {
4916         glReadBuffer(surface_get_gl_buffer(src_surface));
4917     }
4918     checkGLcall("glReadBuffer");
4919
4920     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4921     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4922
4923     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4924     {
4925         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4926
4927         if (filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT)
4928             ERR("Texture filtering not supported in direct blit.\n");
4929     }
4930     else if ((filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT)
4931             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4932     {
4933         ERR("Texture filtering not supported in direct blit\n");
4934     }
4935
4936     if (upsidedown
4937             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4938             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4939     {
4940         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4941
4942         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4943                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4944                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4945                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4946     }
4947     else
4948     {
4949         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4950         /* I have to process this row by row to swap the image,
4951          * otherwise it would be upside down, so stretching in y direction
4952          * doesn't cost extra time
4953          *
4954          * However, stretching in x direction can be avoided if not necessary
4955          */
4956         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4957             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4958             {
4959                 /* Well, that stuff works, but it's very slow.
4960                  * find a better way instead
4961                  */
4962                 UINT col;
4963
4964                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4965                 {
4966                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4967                             dst_rect.left + col /* x offset */, row /* y offset */,
4968                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4969                 }
4970             }
4971             else
4972             {
4973                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4974                         dst_rect.left /* x offset */, row /* y offset */,
4975                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4976             }
4977         }
4978     }
4979     checkGLcall("glCopyTexSubImage2D");
4980
4981     LEAVE_GL();
4982     context_release(context);
4983
4984     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4985      * path is never entered
4986      */
4987     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4988 }
4989
4990 /* Uses the hardware to stretch and flip the image */
4991 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4992         const RECT *src_rect, const RECT *dst_rect_in, enum wined3d_texture_filter_type filter)
4993 {
4994     struct wined3d_device *device = dst_surface->resource.device;
4995     struct wined3d_swapchain *src_swapchain = NULL;
4996     GLuint src, backup = 0;
4997     float left, right, top, bottom; /* Texture coordinates */
4998     UINT fbwidth = src_surface->resource.width;
4999     UINT fbheight = src_surface->resource.height;
5000     struct wined3d_context *context;
5001     GLenum drawBuffer = GL_BACK;
5002     GLenum texture_target;
5003     BOOL noBackBufferBackup;
5004     BOOL src_offscreen;
5005     BOOL upsidedown = FALSE;
5006     RECT dst_rect = *dst_rect_in;
5007
5008     TRACE("Using hwstretch blit\n");
5009     /* Activate the Proper context for reading from the source surface, set it up for blitting */
5010     context = context_acquire(device, src_surface);
5011     context_apply_blit_state(context, device);
5012     surface_internal_preload(dst_surface, SRGB_RGB);
5013
5014     src_offscreen = surface_is_offscreen(src_surface);
5015     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
5016     if (!noBackBufferBackup && !src_surface->texture_name)
5017     {
5018         /* Get it a description */
5019         surface_internal_preload(src_surface, SRGB_RGB);
5020     }
5021     ENTER_GL();
5022
5023     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
5024      * This way we don't have to wait for the 2nd readback to finish to leave this function.
5025      */
5026     if (context->aux_buffers >= 2)
5027     {
5028         /* Got more than one aux buffer? Use the 2nd aux buffer */
5029         drawBuffer = GL_AUX1;
5030     }
5031     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
5032     {
5033         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
5034         drawBuffer = GL_AUX0;
5035     }
5036
5037     if(noBackBufferBackup) {
5038         glGenTextures(1, &backup);
5039         checkGLcall("glGenTextures");
5040         context_bind_texture(context, GL_TEXTURE_2D, backup);
5041         texture_target = GL_TEXTURE_2D;
5042     } else {
5043         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
5044          * we are reading from the back buffer, the backup can be used as source texture
5045          */
5046         texture_target = src_surface->texture_target;
5047         context_bind_texture(context, texture_target, src_surface->texture_name);
5048         glEnable(texture_target);
5049         checkGLcall("glEnable(texture_target)");
5050
5051         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
5052         src_surface->flags &= ~SFLAG_INTEXTURE;
5053     }
5054
5055     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
5056      * glCopyTexSubImage is a bit picky about the parameters we pass to it
5057      */
5058     if(dst_rect.top > dst_rect.bottom) {
5059         UINT tmp = dst_rect.bottom;
5060         dst_rect.bottom = dst_rect.top;
5061         dst_rect.top = tmp;
5062         upsidedown = TRUE;
5063     }
5064
5065     if (src_offscreen)
5066     {
5067         TRACE("Reading from an offscreen target\n");
5068         upsidedown = !upsidedown;
5069         glReadBuffer(device->offscreenBuffer);
5070     }
5071     else
5072     {
5073         glReadBuffer(surface_get_gl_buffer(src_surface));
5074     }
5075
5076     /* TODO: Only back up the part that will be overwritten */
5077     glCopyTexSubImage2D(texture_target, 0,
5078                         0, 0 /* read offsets */,
5079                         0, 0,
5080                         fbwidth,
5081                         fbheight);
5082
5083     checkGLcall("glCopyTexSubImage2D");
5084
5085     /* No issue with overriding these - the sampler is dirty due to blit usage */
5086     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
5087             wined3d_gl_mag_filter(magLookup, filter));
5088     checkGLcall("glTexParameteri");
5089     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
5090             wined3d_gl_min_mip_filter(minMipLookup, filter, WINED3D_TEXF_NONE));
5091     checkGLcall("glTexParameteri");
5092
5093     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5094         src_swapchain = src_surface->container.u.swapchain;
5095     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
5096     {
5097         src = backup ? backup : src_surface->texture_name;
5098     }
5099     else
5100     {
5101         glReadBuffer(GL_FRONT);
5102         checkGLcall("glReadBuffer(GL_FRONT)");
5103
5104         glGenTextures(1, &src);
5105         checkGLcall("glGenTextures(1, &src)");
5106         context_bind_texture(context, GL_TEXTURE_2D, src);
5107
5108         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5109          * out for power of 2 sizes
5110          */
5111         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5112                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5113         checkGLcall("glTexImage2D");
5114         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
5115                             0, 0 /* read offsets */,
5116                             0, 0,
5117                             fbwidth,
5118                             fbheight);
5119
5120         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5121         checkGLcall("glTexParameteri");
5122         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5123         checkGLcall("glTexParameteri");
5124
5125         glReadBuffer(GL_BACK);
5126         checkGLcall("glReadBuffer(GL_BACK)");
5127
5128         if(texture_target != GL_TEXTURE_2D) {
5129             glDisable(texture_target);
5130             glEnable(GL_TEXTURE_2D);
5131             texture_target = GL_TEXTURE_2D;
5132         }
5133     }
5134     checkGLcall("glEnd and previous");
5135
5136     left = src_rect->left;
5137     right = src_rect->right;
5138
5139     if (!upsidedown)
5140     {
5141         top = src_surface->resource.height - src_rect->top;
5142         bottom = src_surface->resource.height - src_rect->bottom;
5143     }
5144     else
5145     {
5146         top = src_surface->resource.height - src_rect->bottom;
5147         bottom = src_surface->resource.height - src_rect->top;
5148     }
5149
5150     if (src_surface->flags & SFLAG_NORMCOORD)
5151     {
5152         left /= src_surface->pow2Width;
5153         right /= src_surface->pow2Width;
5154         top /= src_surface->pow2Height;
5155         bottom /= src_surface->pow2Height;
5156     }
5157
5158     /* draw the source texture stretched and upside down. The correct surface is bound already */
5159     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5160     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5161
5162     context_set_draw_buffer(context, drawBuffer);
5163     glReadBuffer(drawBuffer);
5164
5165     glBegin(GL_QUADS);
5166         /* bottom left */
5167         glTexCoord2f(left, bottom);
5168         glVertex2i(0, 0);
5169
5170         /* top left */
5171         glTexCoord2f(left, top);
5172         glVertex2i(0, dst_rect.bottom - dst_rect.top);
5173
5174         /* top right */
5175         glTexCoord2f(right, top);
5176         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5177
5178         /* bottom right */
5179         glTexCoord2f(right, bottom);
5180         glVertex2i(dst_rect.right - dst_rect.left, 0);
5181     glEnd();
5182     checkGLcall("glEnd and previous");
5183
5184     if (texture_target != dst_surface->texture_target)
5185     {
5186         glDisable(texture_target);
5187         glEnable(dst_surface->texture_target);
5188         texture_target = dst_surface->texture_target;
5189     }
5190
5191     /* Now read the stretched and upside down image into the destination texture */
5192     context_bind_texture(context, texture_target, dst_surface->texture_name);
5193     glCopyTexSubImage2D(texture_target,
5194                         0,
5195                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5196                         0, 0, /* We blitted the image to the origin */
5197                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5198     checkGLcall("glCopyTexSubImage2D");
5199
5200     if(drawBuffer == GL_BACK) {
5201         /* Write the back buffer backup back */
5202         if(backup) {
5203             if(texture_target != GL_TEXTURE_2D) {
5204                 glDisable(texture_target);
5205                 glEnable(GL_TEXTURE_2D);
5206                 texture_target = GL_TEXTURE_2D;
5207             }
5208             context_bind_texture(context, GL_TEXTURE_2D, backup);
5209         }
5210         else
5211         {
5212             if (texture_target != src_surface->texture_target)
5213             {
5214                 glDisable(texture_target);
5215                 glEnable(src_surface->texture_target);
5216                 texture_target = src_surface->texture_target;
5217             }
5218             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5219         }
5220
5221         glBegin(GL_QUADS);
5222             /* top left */
5223             glTexCoord2f(0.0f, 0.0f);
5224             glVertex2i(0, fbheight);
5225
5226             /* bottom left */
5227             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5228             glVertex2i(0, 0);
5229
5230             /* bottom right */
5231             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5232                     (float)fbheight / (float)src_surface->pow2Height);
5233             glVertex2i(fbwidth, 0);
5234
5235             /* top right */
5236             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5237             glVertex2i(fbwidth, fbheight);
5238         glEnd();
5239     }
5240     glDisable(texture_target);
5241     checkGLcall("glDisable(texture_target)");
5242
5243     /* Cleanup */
5244     if (src != src_surface->texture_name && src != backup)
5245     {
5246         glDeleteTextures(1, &src);
5247         checkGLcall("glDeleteTextures(1, &src)");
5248     }
5249     if(backup) {
5250         glDeleteTextures(1, &backup);
5251         checkGLcall("glDeleteTextures(1, &backup)");
5252     }
5253
5254     LEAVE_GL();
5255
5256     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5257
5258     context_release(context);
5259
5260     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5261      * path is never entered
5262      */
5263     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5264 }
5265
5266 /* Front buffer coordinates are always full screen coordinates, but our GL
5267  * drawable is limited to the window's client area. The sysmem and texture
5268  * copies do have the full screen size. Note that GL has a bottom-left
5269  * origin, while D3D has a top-left origin. */
5270 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5271 {
5272     UINT drawable_height;
5273
5274     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5275             && surface == surface->container.u.swapchain->front_buffer)
5276     {
5277         POINT offset = {0, 0};
5278         RECT windowsize;
5279
5280         ScreenToClient(window, &offset);
5281         OffsetRect(rect, offset.x, offset.y);
5282
5283         GetClientRect(window, &windowsize);
5284         drawable_height = windowsize.bottom - windowsize.top;
5285     }
5286     else
5287     {
5288         drawable_height = surface->resource.height;
5289     }
5290
5291     rect->top = drawable_height - rect->top;
5292     rect->bottom = drawable_height - rect->bottom;
5293 }
5294
5295 static void surface_blt_to_drawable(const struct wined3d_device *device,
5296         enum wined3d_texture_filter_type filter, BOOL color_key,
5297         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5298         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5299 {
5300     struct wined3d_context *context;
5301     RECT src_rect, dst_rect;
5302
5303     src_rect = *src_rect_in;
5304     dst_rect = *dst_rect_in;
5305
5306     /* Make sure the surface is up-to-date. This should probably use
5307      * surface_load_location() and worry about the destination surface too,
5308      * unless we're overwriting it completely. */
5309     surface_internal_preload(src_surface, SRGB_RGB);
5310
5311     /* Activate the destination context, set it up for blitting */
5312     context = context_acquire(device, dst_surface);
5313     context_apply_blit_state(context, device);
5314
5315     if (!surface_is_offscreen(dst_surface))
5316         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5317
5318     device->blitter->set_shader(device->blit_priv, context, src_surface);
5319
5320     ENTER_GL();
5321
5322     if (color_key)
5323     {
5324         glEnable(GL_ALPHA_TEST);
5325         checkGLcall("glEnable(GL_ALPHA_TEST)");
5326
5327         /* When the primary render target uses P8, the alpha component
5328          * contains the palette index. Which means that the colorkey is one of
5329          * the palette entries. In other cases pixels that should be masked
5330          * away have alpha set to 0. */
5331         if (primary_render_target_is_p8(device))
5332             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->src_blt_color_key.color_space_low_value / 256.0f);
5333         else
5334             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5335         checkGLcall("glAlphaFunc");
5336     }
5337     else
5338     {
5339         glDisable(GL_ALPHA_TEST);
5340         checkGLcall("glDisable(GL_ALPHA_TEST)");
5341     }
5342
5343     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5344
5345     if (color_key)
5346     {
5347         glDisable(GL_ALPHA_TEST);
5348         checkGLcall("glDisable(GL_ALPHA_TEST)");
5349     }
5350
5351     LEAVE_GL();
5352
5353     /* Leave the opengl state valid for blitting */
5354     device->blitter->unset_shader(context->gl_info);
5355
5356     if (wined3d_settings.strict_draw_ordering
5357             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5358             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5359         wglFlush(); /* Flush to ensure ordering across contexts. */
5360
5361     context_release(context);
5362 }
5363
5364 /* Do not call while under the GL lock. */
5365 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const struct wined3d_color *color)
5366 {
5367     struct wined3d_device *device = s->resource.device;
5368     const struct blit_shader *blitter;
5369
5370     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5371             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5372     if (!blitter)
5373     {
5374         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5375         return WINED3DERR_INVALIDCALL;
5376     }
5377
5378     return blitter->color_fill(device, s, rect, color);
5379 }
5380
5381 /* Do not call while under the GL lock. */
5382 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5383         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5384         enum wined3d_texture_filter_type filter)
5385 {
5386     struct wined3d_device *device = dst_surface->resource.device;
5387     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5388     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5389
5390     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5391             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5392             flags, DDBltFx, debug_d3dtexturefiltertype(filter));
5393
5394     /* Get the swapchain. One of the surfaces has to be a primary surface */
5395     if (dst_surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
5396     {
5397         WARN("Destination is in sysmem, rejecting gl blt\n");
5398         return WINED3DERR_INVALIDCALL;
5399     }
5400
5401     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5402         dstSwapchain = dst_surface->container.u.swapchain;
5403
5404     if (src_surface)
5405     {
5406         if (src_surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
5407         {
5408             WARN("Src is in sysmem, rejecting gl blt\n");
5409             return WINED3DERR_INVALIDCALL;
5410         }
5411
5412         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5413             srcSwapchain = src_surface->container.u.swapchain;
5414     }
5415
5416     /* Early sort out of cases where no render target is used */
5417     if (!dstSwapchain && !srcSwapchain
5418             && src_surface != device->fb.render_targets[0]
5419             && dst_surface != device->fb.render_targets[0])
5420     {
5421         TRACE("No surface is render target, not using hardware blit.\n");
5422         return WINED3DERR_INVALIDCALL;
5423     }
5424
5425     /* No destination color keying supported */
5426     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5427     {
5428         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5429         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5430         return WINED3DERR_INVALIDCALL;
5431     }
5432
5433     if (dstSwapchain && dstSwapchain == srcSwapchain)
5434     {
5435         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5436         return WINED3DERR_INVALIDCALL;
5437     }
5438
5439     if (dstSwapchain && srcSwapchain)
5440     {
5441         FIXME("Implement hardware blit between two different swapchains\n");
5442         return WINED3DERR_INVALIDCALL;
5443     }
5444
5445     if (dstSwapchain)
5446     {
5447         /* Handled with regular texture -> swapchain blit */
5448         if (src_surface == device->fb.render_targets[0])
5449             TRACE("Blit from active render target to a swapchain\n");
5450     }
5451     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5452     {
5453         FIXME("Implement blit from a swapchain to the active render target\n");
5454         return WINED3DERR_INVALIDCALL;
5455     }
5456
5457     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5458     {
5459         /* Blit from render target to texture */
5460         BOOL stretchx;
5461
5462         /* P8 read back is not implemented */
5463         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5464                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5465         {
5466             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5467             return WINED3DERR_INVALIDCALL;
5468         }
5469
5470         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5471         {
5472             TRACE("Color keying not supported by frame buffer to texture blit\n");
5473             return WINED3DERR_INVALIDCALL;
5474             /* Destination color key is checked above */
5475         }
5476
5477         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5478             stretchx = TRUE;
5479         else
5480             stretchx = FALSE;
5481
5482         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5483          * flip the image nor scale it.
5484          *
5485          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5486          * -> If the app wants a image width an unscaled width, copy it line per line
5487          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5488          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5489          *    back buffer. This is slower than reading line per line, thus not used for flipping
5490          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5491          *    pixel by pixel. */
5492         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5493                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5494         {
5495             TRACE("No stretching in x direction, using direct framebuffer -> texture copy.\n");
5496             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, filter);
5497         }
5498         else
5499         {
5500             TRACE("Using hardware stretching to flip / stretch the texture.\n");
5501             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, filter);
5502         }
5503
5504         if (!dst_surface->resource.map_count && !(dst_surface->flags & SFLAG_DONOTFREE))
5505         {
5506             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5507             dst_surface->resource.allocatedMemory = NULL;
5508             dst_surface->resource.heapMemory = NULL;
5509         }
5510         else
5511         {
5512             dst_surface->flags &= ~SFLAG_INSYSMEM;
5513         }
5514
5515         return WINED3D_OK;
5516     }
5517     else if (src_surface)
5518     {
5519         /* Blit from offscreen surface to render target */
5520         struct wined3d_color_key old_blt_key = src_surface->src_blt_color_key;
5521         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5522
5523         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5524
5525         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5526                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5527                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5528         {
5529             FIXME("Unsupported blit operation falling back to software\n");
5530             return WINED3DERR_INVALIDCALL;
5531         }
5532
5533         /* Color keying: Check if we have to do a color keyed blt,
5534          * and if not check if a color key is activated.
5535          *
5536          * Just modify the color keying parameters in the surface and restore them afterwards
5537          * The surface keeps track of the color key last used to load the opengl surface.
5538          * PreLoad will catch the change to the flags and color key and reload if necessary.
5539          */
5540         if (flags & WINEDDBLT_KEYSRC)
5541         {
5542             /* Use color key from surface */
5543         }
5544         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5545         {
5546             /* Use color key from DDBltFx */
5547             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5548             src_surface->src_blt_color_key = DDBltFx->ddckSrcColorkey;
5549         }
5550         else
5551         {
5552             /* Do not use color key */
5553             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5554         }
5555
5556         surface_blt_to_drawable(device, filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5557                 src_surface, src_rect, dst_surface, dst_rect);
5558
5559         /* Restore the color key parameters */
5560         src_surface->CKeyFlags = oldCKeyFlags;
5561         src_surface->src_blt_color_key = old_blt_key;
5562
5563         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5564
5565         return WINED3D_OK;
5566     }
5567
5568     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5569     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5570     return WINED3DERR_INVALIDCALL;
5571 }
5572
5573 /* GL locking is done by the caller */
5574 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5575         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5576 {
5577     struct wined3d_device *device = surface->resource.device;
5578     const struct wined3d_gl_info *gl_info = context->gl_info;
5579     GLint compare_mode = GL_NONE;
5580     struct blt_info info;
5581     GLint old_binding = 0;
5582     RECT rect;
5583
5584     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5585
5586     glDisable(GL_CULL_FACE);
5587     glDisable(GL_BLEND);
5588     glDisable(GL_ALPHA_TEST);
5589     glDisable(GL_SCISSOR_TEST);
5590     glDisable(GL_STENCIL_TEST);
5591     glEnable(GL_DEPTH_TEST);
5592     glDepthFunc(GL_ALWAYS);
5593     glDepthMask(GL_TRUE);
5594     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5595     glViewport(x, y, w, h);
5596     glDepthRange(0.0, 1.0);
5597
5598     SetRect(&rect, 0, h, w, 0);
5599     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5600     context_active_texture(context, context->gl_info, 0);
5601     glGetIntegerv(info.binding, &old_binding);
5602     glBindTexture(info.bind_target, texture);
5603     if (gl_info->supported[ARB_SHADOW])
5604     {
5605         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5606         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5607     }
5608
5609     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5610             gl_info, info.tex_type, &surface->ds_current_size);
5611
5612     glBegin(GL_TRIANGLE_STRIP);
5613     glTexCoord3fv(info.coords[0]);
5614     glVertex2f(-1.0f, -1.0f);
5615     glTexCoord3fv(info.coords[1]);
5616     glVertex2f(1.0f, -1.0f);
5617     glTexCoord3fv(info.coords[2]);
5618     glVertex2f(-1.0f, 1.0f);
5619     glTexCoord3fv(info.coords[3]);
5620     glVertex2f(1.0f, 1.0f);
5621     glEnd();
5622
5623     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5624     glBindTexture(info.bind_target, old_binding);
5625
5626     glPopAttrib();
5627
5628     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5629 }
5630
5631 void surface_modify_ds_location(struct wined3d_surface *surface,
5632         DWORD location, UINT w, UINT h)
5633 {
5634     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5635
5636     if (location & ~(SFLAG_LOCATIONS | SFLAG_DISCARDED))
5637         FIXME("Invalid location (%#x) specified.\n", location);
5638
5639     if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5640             || (!(surface->flags & SFLAG_INTEXTURE) && (location & SFLAG_INTEXTURE)))
5641     {
5642         if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5643         {
5644             TRACE("Passing to container.\n");
5645             wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5646         }
5647     }
5648
5649     surface->ds_current_size.cx = w;
5650     surface->ds_current_size.cy = h;
5651     surface->flags &= ~(SFLAG_LOCATIONS | SFLAG_DISCARDED);
5652     surface->flags |= location;
5653 }
5654
5655 /* Context activation is done by the caller. */
5656 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5657 {
5658     struct wined3d_device *device = surface->resource.device;
5659     GLsizei w, h;
5660
5661     TRACE("surface %p, new location %#x.\n", surface, location);
5662
5663     /* TODO: Make this work for modes other than FBO */
5664     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5665
5666     if (!(surface->flags & location))
5667     {
5668         w = surface->ds_current_size.cx;
5669         h = surface->ds_current_size.cy;
5670         surface->ds_current_size.cx = 0;
5671         surface->ds_current_size.cy = 0;
5672     }
5673     else
5674     {
5675         w = surface->resource.width;
5676         h = surface->resource.height;
5677     }
5678
5679     if (surface->ds_current_size.cx == surface->resource.width
5680             && surface->ds_current_size.cy == surface->resource.height)
5681     {
5682         TRACE("Location (%#x) is already up to date.\n", location);
5683         return;
5684     }
5685
5686     if (surface->current_renderbuffer)
5687     {
5688         FIXME("Not supported with fixed up depth stencil.\n");
5689         return;
5690     }
5691
5692     if (surface->flags & SFLAG_DISCARDED)
5693     {
5694         TRACE("Surface was discarded, no need copy data.\n");
5695         switch (location)
5696         {
5697             case SFLAG_INTEXTURE:
5698                 surface_prepare_texture(surface, context, FALSE);
5699                 break;
5700             case SFLAG_INRB_MULTISAMPLE:
5701                 surface_prepare_rb(surface, context->gl_info, TRUE);
5702                 break;
5703             case SFLAG_INDRAWABLE:
5704                 /* Nothing to do */
5705                 break;
5706             default:
5707                 FIXME("Unhandled location %#x\n", location);
5708         }
5709         surface->flags &= ~SFLAG_DISCARDED;
5710         surface->flags |= location;
5711         surface->ds_current_size.cx = surface->resource.width;
5712         surface->ds_current_size.cy = surface->resource.height;
5713         return;
5714     }
5715
5716     if (!(surface->flags & SFLAG_LOCATIONS))
5717     {
5718         FIXME("No up to date depth stencil location.\n");
5719         surface->flags |= location;
5720         surface->ds_current_size.cx = surface->resource.width;
5721         surface->ds_current_size.cy = surface->resource.height;
5722         return;
5723     }
5724
5725     if (location == SFLAG_INTEXTURE)
5726     {
5727         GLint old_binding = 0;
5728         GLenum bind_target;
5729
5730         /* The render target is allowed to be smaller than the depth/stencil
5731          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5732          * than the offscreen surface. Don't overwrite the offscreen surface
5733          * with undefined data. */
5734         w = min(w, context->swapchain->desc.backbuffer_width);
5735         h = min(h, context->swapchain->desc.backbuffer_height);
5736
5737         TRACE("Copying onscreen depth buffer to depth texture.\n");
5738
5739         ENTER_GL();
5740
5741         if (!device->depth_blt_texture)
5742         {
5743             glGenTextures(1, &device->depth_blt_texture);
5744         }
5745
5746         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5747          * directly on the FBO texture. That's because we need to flip. */
5748         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5749                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5750         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5751         {
5752             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5753             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5754         }
5755         else
5756         {
5757             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5758             bind_target = GL_TEXTURE_2D;
5759         }
5760         glBindTexture(bind_target, device->depth_blt_texture);
5761         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5762          * internal format, because the internal format might include stencil
5763          * data. In principle we should copy stencil data as well, but unless
5764          * the driver supports stencil export it's hard to do, and doesn't
5765          * seem to be needed in practice. If the hardware doesn't support
5766          * writing stencil data, the glCopyTexImage2D() call might trigger
5767          * software fallbacks. */
5768         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5769         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5770         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5771         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5772         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5773         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5774         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5775         glBindTexture(bind_target, old_binding);
5776
5777         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5778                 NULL, surface, SFLAG_INTEXTURE);
5779         context_set_draw_buffer(context, GL_NONE);
5780         glReadBuffer(GL_NONE);
5781
5782         /* Do the actual blit */
5783         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5784         checkGLcall("depth_blt");
5785
5786         context_invalidate_state(context, STATE_FRAMEBUFFER);
5787
5788         LEAVE_GL();
5789
5790         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5791     }
5792     else if (location == SFLAG_INDRAWABLE)
5793     {
5794         TRACE("Copying depth texture to onscreen depth buffer.\n");
5795
5796         ENTER_GL();
5797
5798         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5799                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5800         surface_depth_blt(surface, context, surface->texture_name,
5801                 0, surface->pow2Height - h, w, h, surface->texture_target);
5802         checkGLcall("depth_blt");
5803
5804         context_invalidate_state(context, STATE_FRAMEBUFFER);
5805
5806         LEAVE_GL();
5807
5808         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5809     }
5810     else
5811     {
5812         ERR("Invalid location (%#x) specified.\n", location);
5813     }
5814
5815     surface->flags |= location;
5816     surface->ds_current_size.cx = surface->resource.width;
5817     surface->ds_current_size.cy = surface->resource.height;
5818 }
5819
5820 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5821 {
5822     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5823     struct wined3d_surface *overlay;
5824
5825     TRACE("surface %p, location %s, persistent %#x.\n",
5826             surface, debug_surflocation(location), persistent);
5827
5828     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5829             && !(surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
5830             && (location & SFLAG_INDRAWABLE))
5831         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5832
5833     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5834             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5835         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5836
5837     if (persistent)
5838     {
5839         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5840                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5841         {
5842             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5843             {
5844                 TRACE("Passing to container.\n");
5845                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5846             }
5847         }
5848         surface->flags &= ~SFLAG_LOCATIONS;
5849         surface->flags |= location;
5850
5851         /* Redraw emulated overlays, if any */
5852         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5853         {
5854             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5855             {
5856                 surface_draw_overlay(overlay);
5857             }
5858         }
5859     }
5860     else
5861     {
5862         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5863         {
5864             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5865             {
5866                 TRACE("Passing to container\n");
5867                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5868             }
5869         }
5870         surface->flags &= ~location;
5871     }
5872
5873     if (!(surface->flags & SFLAG_LOCATIONS))
5874     {
5875         ERR("Surface %p does not have any up to date location.\n", surface);
5876     }
5877 }
5878
5879 static DWORD resource_access_from_location(DWORD location)
5880 {
5881     switch (location)
5882     {
5883         case SFLAG_INSYSMEM:
5884             return WINED3D_RESOURCE_ACCESS_CPU;
5885
5886         case SFLAG_INDRAWABLE:
5887         case SFLAG_INSRGBTEX:
5888         case SFLAG_INTEXTURE:
5889         case SFLAG_INRB_MULTISAMPLE:
5890         case SFLAG_INRB_RESOLVED:
5891             return WINED3D_RESOURCE_ACCESS_GPU;
5892
5893         default:
5894             FIXME("Unhandled location %#x.\n", location);
5895             return 0;
5896     }
5897 }
5898
5899 static void surface_load_sysmem(struct wined3d_surface *surface,
5900         const struct wined3d_gl_info *gl_info, const RECT *rect)
5901 {
5902     surface_prepare_system_memory(surface);
5903
5904     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED))
5905         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5906
5907     /* Download the surface to system memory. */
5908     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5909     {
5910         struct wined3d_device *device = surface->resource.device;
5911         struct wined3d_context *context;
5912
5913         /* TODO: Use already acquired context when possible. */
5914         context = context_acquire(device, NULL);
5915
5916         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5917         surface_download_data(surface, gl_info);
5918
5919         context_release(context);
5920
5921         return;
5922     }
5923
5924     if (surface->flags & SFLAG_INDRAWABLE)
5925     {
5926         read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5927                 wined3d_surface_get_pitch(surface));
5928         return;
5929     }
5930
5931     FIXME("Can't load surface %p with location flags %#x into sysmem.\n",
5932             surface, surface->flags & SFLAG_LOCATIONS);
5933 }
5934
5935 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5936         const struct wined3d_gl_info *gl_info, const RECT *rect)
5937 {
5938     struct wined3d_device *device = surface->resource.device;
5939     enum wined3d_conversion_type convert;
5940     struct wined3d_format format;
5941     UINT byte_count;
5942     BYTE *mem;
5943
5944     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5945     {
5946         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5947         return WINED3DERR_INVALIDCALL;
5948     }
5949
5950     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5951         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5952
5953     if (surface->flags & SFLAG_INTEXTURE)
5954     {
5955         RECT r;
5956
5957         surface_get_rect(surface, rect, &r);
5958         surface_blt_to_drawable(device, WINED3D_TEXF_POINT, FALSE, surface, &r, surface, &r);
5959
5960         return WINED3D_OK;
5961     }
5962
5963     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5964     {
5965         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5966          * path through sysmem. */
5967         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5968     }
5969
5970     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5971
5972     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5973      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5974      * called. */
5975     if ((convert != WINED3D_CT_NONE) && (surface->flags & SFLAG_PBO))
5976     {
5977         struct wined3d_context *context;
5978
5979         TRACE("Removing the pbo attached to surface %p.\n", surface);
5980
5981         /* TODO: Use already acquired context when possible. */
5982         context = context_acquire(device, NULL);
5983
5984         surface_remove_pbo(surface, gl_info);
5985
5986         context_release(context);
5987     }
5988
5989     if ((convert != WINED3D_CT_NONE) && surface->resource.allocatedMemory)
5990     {
5991         UINT height = surface->resource.height;
5992         UINT width = surface->resource.width;
5993         UINT src_pitch, dst_pitch;
5994
5995         byte_count = format.conv_byte_count;
5996         src_pitch = wined3d_surface_get_pitch(surface);
5997
5998         /* Stick to the alignment for the converted surface too, makes it
5999          * easier to load the surface. */
6000         dst_pitch = width * byte_count;
6001         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6002
6003         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6004         {
6005             ERR("Out of memory (%u).\n", dst_pitch * height);
6006             return E_OUTOFMEMORY;
6007         }
6008
6009         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
6010                 src_pitch, width, height, dst_pitch, convert, surface);
6011
6012         surface->flags |= SFLAG_CONVERTED;
6013     }
6014     else
6015     {
6016         surface->flags &= ~SFLAG_CONVERTED;
6017         mem = surface->resource.allocatedMemory;
6018         byte_count = format.byte_count;
6019     }
6020
6021     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
6022
6023     /* Don't delete PBO memory. */
6024     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6025         HeapFree(GetProcessHeap(), 0, mem);
6026
6027     return WINED3D_OK;
6028 }
6029
6030 static HRESULT surface_load_texture(struct wined3d_surface *surface,
6031         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
6032 {
6033     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
6034     struct wined3d_device *device = surface->resource.device;
6035     enum wined3d_conversion_type convert;
6036     struct wined3d_context *context;
6037     UINT width, src_pitch, dst_pitch;
6038     struct wined3d_bo_address data;
6039     struct wined3d_format format;
6040     POINT dst_point = {0, 0};
6041     BYTE *mem;
6042
6043     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
6044             && surface_is_offscreen(surface)
6045             && (surface->flags & SFLAG_INDRAWABLE))
6046     {
6047         surface_load_fb_texture(surface, srgb);
6048
6049         return WINED3D_OK;
6050     }
6051
6052     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
6053             && (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB)
6054             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6055                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6056                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6057     {
6058         if (srgb)
6059             surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, SFLAG_INTEXTURE,
6060                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
6061         else
6062             surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, SFLAG_INSRGBTEX,
6063                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
6064
6065         return WINED3D_OK;
6066     }
6067
6068     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED)
6069             && (!srgb || (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB))
6070             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6071                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6072                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6073     {
6074         DWORD src_location = surface->flags & SFLAG_INRB_RESOLVED ? SFLAG_INRB_RESOLVED : SFLAG_INRB_MULTISAMPLE;
6075         DWORD dst_location = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
6076         RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6077
6078         surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, src_location,
6079                 &rect, surface, dst_location, &rect);
6080
6081         return WINED3D_OK;
6082     }
6083
6084     /* Upload from system memory */
6085
6086     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
6087             TRUE /* We will use textures */, &format, &convert);
6088
6089     if (srgb)
6090     {
6091         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
6092         {
6093             /* Performance warning... */
6094             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6095             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6096         }
6097     }
6098     else
6099     {
6100         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6101         {
6102             /* Performance warning... */
6103             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6104             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6105         }
6106     }
6107
6108     if (!(surface->flags & SFLAG_INSYSMEM))
6109     {
6110         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6111         /* Lets hope we get it from somewhere... */
6112         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6113     }
6114
6115     /* TODO: Use already acquired context when possible. */
6116     context = context_acquire(device, NULL);
6117
6118     surface_prepare_texture(surface, context, srgb);
6119     surface_bind_and_dirtify(surface, context, srgb);
6120
6121     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6122     {
6123         surface->flags |= SFLAG_GLCKEY;
6124         surface->gl_color_key = surface->src_blt_color_key;
6125     }
6126     else surface->flags &= ~SFLAG_GLCKEY;
6127
6128     width = surface->resource.width;
6129     src_pitch = wined3d_surface_get_pitch(surface);
6130
6131     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6132      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6133      * called. */
6134     if ((convert != WINED3D_CT_NONE || format.convert) && (surface->flags & SFLAG_PBO))
6135     {
6136         TRACE("Removing the pbo attached to surface %p.\n", surface);
6137         surface_remove_pbo(surface, gl_info);
6138     }
6139
6140     if (format.convert)
6141     {
6142         /* This code is entered for texture formats which need a fixup. */
6143         UINT height = surface->resource.height;
6144
6145         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6146         dst_pitch = width * format.conv_byte_count;
6147         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6148
6149         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6150         {
6151             ERR("Out of memory (%u).\n", dst_pitch * height);
6152             context_release(context);
6153             return E_OUTOFMEMORY;
6154         }
6155         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6156         format.byte_count = format.conv_byte_count;
6157         src_pitch = dst_pitch;
6158     }
6159     else if (convert != WINED3D_CT_NONE && surface->resource.allocatedMemory)
6160     {
6161         /* This code is only entered for color keying fixups */
6162         UINT height = surface->resource.height;
6163
6164         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6165         dst_pitch = width * format.conv_byte_count;
6166         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6167
6168         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6169         {
6170             ERR("Out of memory (%u).\n", dst_pitch * height);
6171             context_release(context);
6172             return E_OUTOFMEMORY;
6173         }
6174         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6175                 width, height, dst_pitch, convert, surface);
6176         format.byte_count = format.conv_byte_count;
6177         src_pitch = dst_pitch;
6178     }
6179     else
6180     {
6181         mem = surface->resource.allocatedMemory;
6182     }
6183
6184     data.buffer_object = surface->pbo;
6185     data.addr = mem;
6186     surface_upload_data(surface, gl_info, &format, &src_rect, src_pitch, &dst_point, srgb, &data);
6187
6188     context_release(context);
6189
6190     /* Don't delete PBO memory. */
6191     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6192         HeapFree(GetProcessHeap(), 0, mem);
6193
6194     return WINED3D_OK;
6195 }
6196
6197 static void surface_multisample_resolve(struct wined3d_surface *surface)
6198 {
6199     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6200
6201     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6202         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6203
6204     surface_blt_fbo(surface->resource.device, WINED3D_TEXF_POINT,
6205             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6206 }
6207
6208 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6209 {
6210     struct wined3d_device *device = surface->resource.device;
6211     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6212     HRESULT hr;
6213
6214     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6215
6216     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6217     {
6218         if (location == SFLAG_INTEXTURE)
6219         {
6220             struct wined3d_context *context = context_acquire(device, NULL);
6221             surface_load_ds_location(surface, context, location);
6222             context_release(context);
6223             return WINED3D_OK;
6224         }
6225         else
6226         {
6227             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6228             return WINED3DERR_INVALIDCALL;
6229         }
6230     }
6231
6232     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6233         location = SFLAG_INTEXTURE;
6234
6235     if (surface->flags & location)
6236     {
6237         TRACE("Location already up to date.\n");
6238
6239         if (location == SFLAG_INSYSMEM && !(surface->flags & SFLAG_PBO)
6240                 && surface_need_pbo(surface, gl_info))
6241             surface_load_pbo(surface, gl_info);
6242
6243         return WINED3D_OK;
6244     }
6245
6246     if (WARN_ON(d3d_surface))
6247     {
6248         DWORD required_access = resource_access_from_location(location);
6249         if ((surface->resource.access_flags & required_access) != required_access)
6250             WARN("Operation requires %#x access, but surface only has %#x.\n",
6251                     required_access, surface->resource.access_flags);
6252     }
6253
6254     if (!(surface->flags & SFLAG_LOCATIONS))
6255     {
6256         ERR("Surface %p does not have any up to date location.\n", surface);
6257         surface->flags |= SFLAG_LOST;
6258         return WINED3DERR_DEVICELOST;
6259     }
6260
6261     switch (location)
6262     {
6263         case SFLAG_INSYSMEM:
6264             surface_load_sysmem(surface, gl_info, rect);
6265             break;
6266
6267         case SFLAG_INDRAWABLE:
6268             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6269                 return hr;
6270             break;
6271
6272         case SFLAG_INRB_RESOLVED:
6273             surface_multisample_resolve(surface);
6274             break;
6275
6276         case SFLAG_INTEXTURE:
6277         case SFLAG_INSRGBTEX:
6278             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6279                 return hr;
6280             break;
6281
6282         default:
6283             ERR("Don't know how to handle location %#x.\n", location);
6284             break;
6285     }
6286
6287     if (!rect)
6288     {
6289         surface->flags |= location;
6290
6291         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6292             surface_evict_sysmem(surface);
6293     }
6294
6295     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6296             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6297     {
6298         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6299     }
6300
6301     return WINED3D_OK;
6302 }
6303
6304 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6305 {
6306     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6307
6308     /* Not on a swapchain - must be offscreen */
6309     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6310
6311     /* The front buffer is always onscreen */
6312     if (surface == swapchain->front_buffer) return FALSE;
6313
6314     /* If the swapchain is rendered to an FBO, the backbuffer is
6315      * offscreen, otherwise onscreen */
6316     return swapchain->render_to_fbo;
6317 }
6318
6319 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6320 /* Context activation is done by the caller. */
6321 static void ffp_blit_free(struct wined3d_device *device) { }
6322
6323 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6324 /* Context activation is done by the caller. */
6325 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6326 {
6327     BYTE table[256][4];
6328     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6329
6330     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6331
6332     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6333     ENTER_GL();
6334     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6335     LEAVE_GL();
6336 }
6337
6338 /* Context activation is done by the caller. */
6339 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6340 {
6341     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6342
6343     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6344      * else the surface is converted in software at upload time in LoadLocation.
6345      */
6346     if (!(surface->flags & SFLAG_CONVERTED) && fixup == COMPLEX_FIXUP_P8
6347             && context->gl_info->supported[EXT_PALETTED_TEXTURE])
6348         ffp_blit_p8_upload_palette(surface, context->gl_info);
6349
6350     ENTER_GL();
6351     glEnable(surface->texture_target);
6352     checkGLcall("glEnable(surface->texture_target)");
6353     LEAVE_GL();
6354     return WINED3D_OK;
6355 }
6356
6357 /* Context activation is done by the caller. */
6358 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6359 {
6360     ENTER_GL();
6361     glDisable(GL_TEXTURE_2D);
6362     checkGLcall("glDisable(GL_TEXTURE_2D)");
6363     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6364     {
6365         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6366         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6367     }
6368     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6369     {
6370         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6371         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6372     }
6373     LEAVE_GL();
6374 }
6375
6376 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6377         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
6378         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
6379 {
6380     enum complex_fixup src_fixup;
6381
6382     switch (blit_op)
6383     {
6384         case WINED3D_BLIT_OP_COLOR_BLIT:
6385             if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM)
6386                 return FALSE;
6387
6388             src_fixup = get_complex_fixup(src_format->color_fixup);
6389             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6390             {
6391                 TRACE("Checking support for fixup:\n");
6392                 dump_color_fixup_desc(src_format->color_fixup);
6393             }
6394
6395             if (!is_identity_fixup(dst_format->color_fixup))
6396             {
6397                 TRACE("Destination fixups are not supported\n");
6398                 return FALSE;
6399             }
6400
6401             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6402             {
6403                 TRACE("P8 fixup supported\n");
6404                 return TRUE;
6405             }
6406
6407             /* We only support identity conversions. */
6408             if (is_identity_fixup(src_format->color_fixup))
6409             {
6410                 TRACE("[OK]\n");
6411                 return TRUE;
6412             }
6413
6414             TRACE("[FAILED]\n");
6415             return FALSE;
6416
6417         case WINED3D_BLIT_OP_COLOR_FILL:
6418             if (dst_pool == WINED3D_POOL_SYSTEM_MEM)
6419                 return FALSE;
6420
6421             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6422             {
6423                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6424                     return FALSE;
6425             }
6426             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6427             {
6428                 TRACE("Color fill not supported\n");
6429                 return FALSE;
6430             }
6431
6432             /* FIXME: We should reject color fills on formats with fixups,
6433              * but this would break P8 color fills for example. */
6434
6435             return TRUE;
6436
6437         case WINED3D_BLIT_OP_DEPTH_FILL:
6438             return TRUE;
6439
6440         default:
6441             TRACE("Unsupported blit_op=%d\n", blit_op);
6442             return FALSE;
6443     }
6444 }
6445
6446 /* Do not call while under the GL lock. */
6447 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6448         const RECT *dst_rect, const struct wined3d_color *color)
6449 {
6450     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6451     struct wined3d_fb_state fb = {&dst_surface, NULL};
6452
6453     device_clear_render_targets(device, 1, &fb, 1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6454
6455     return WINED3D_OK;
6456 }
6457
6458 /* Do not call while under the GL lock. */
6459 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6460         struct wined3d_surface *surface, const RECT *rect, float depth)
6461 {
6462     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6463     struct wined3d_fb_state fb = {NULL, surface};
6464
6465     device_clear_render_targets(device, 0, &fb, 1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6466
6467     return WINED3D_OK;
6468 }
6469
6470 const struct blit_shader ffp_blit =  {
6471     ffp_blit_alloc,
6472     ffp_blit_free,
6473     ffp_blit_set,
6474     ffp_blit_unset,
6475     ffp_blit_supported,
6476     ffp_blit_color_fill,
6477     ffp_blit_depth_fill,
6478 };
6479
6480 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6481 {
6482     return WINED3D_OK;
6483 }
6484
6485 /* Context activation is done by the caller. */
6486 static void cpu_blit_free(struct wined3d_device *device)
6487 {
6488 }
6489
6490 /* Context activation is done by the caller. */
6491 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6492 {
6493     return WINED3D_OK;
6494 }
6495
6496 /* Context activation is done by the caller. */
6497 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6498 {
6499 }
6500
6501 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6502         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
6503         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
6504 {
6505     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6506     {
6507         return TRUE;
6508     }
6509
6510     return FALSE;
6511 }
6512
6513 static HRESULT surface_cpu_blt_compressed(const BYTE *src_data, BYTE *dst_data,
6514         UINT src_pitch, UINT dst_pitch, UINT update_w, UINT update_h,
6515         const struct wined3d_format *format, DWORD flags, const WINEDDBLTFX *fx)
6516 {
6517     UINT row_block_count;
6518     const BYTE *src_row;
6519     BYTE *dst_row;
6520     UINT x, y;
6521
6522     src_row = src_data;
6523     dst_row = dst_data;
6524
6525     row_block_count = (update_w + format->block_width - 1) / format->block_width;
6526
6527     if (!flags)
6528     {
6529         for (y = 0; y < update_h; y += format->block_height)
6530         {
6531             memcpy(dst_row, src_row, row_block_count * format->block_byte_count);
6532             src_row += src_pitch;
6533             dst_row += dst_pitch;
6534         }
6535
6536         return WINED3D_OK;
6537     }
6538
6539     if (flags == WINEDDBLT_DDFX && fx->dwDDFX == WINEDDBLTFX_MIRRORUPDOWN)
6540     {
6541         src_row += (((update_h / format->block_height) - 1) * src_pitch);
6542
6543         switch (format->id)
6544         {
6545             case WINED3DFMT_DXT1:
6546                 for (y = 0; y < update_h; y += format->block_height)
6547                 {
6548                     struct block
6549                     {
6550                         WORD color[2];
6551                         BYTE control_row[4];
6552                     };
6553
6554                     const struct block *s = (const struct block *)src_row;
6555                     struct block *d = (struct block *)dst_row;
6556
6557                     for (x = 0; x < row_block_count; ++x)
6558                     {
6559                         d[x].color[0] = s[x].color[0];
6560                         d[x].color[1] = s[x].color[1];
6561                         d[x].control_row[0] = s[x].control_row[3];
6562                         d[x].control_row[1] = s[x].control_row[2];
6563                         d[x].control_row[2] = s[x].control_row[1];
6564                         d[x].control_row[3] = s[x].control_row[0];
6565                     }
6566                     src_row -= src_pitch;
6567                     dst_row += dst_pitch;
6568                 }
6569                 return WINED3D_OK;
6570
6571             case WINED3DFMT_DXT3:
6572                 for (y = 0; y < update_h; y += format->block_height)
6573                 {
6574                     struct block
6575                     {
6576                         WORD alpha_row[4];
6577                         WORD color[2];
6578                         BYTE control_row[4];
6579                     };
6580
6581                     const struct block *s = (const struct block *)src_row;
6582                     struct block *d = (struct block *)dst_row;
6583
6584                     for (x = 0; x < row_block_count; ++x)
6585                     {
6586                         d[x].alpha_row[0] = s[x].alpha_row[3];
6587                         d[x].alpha_row[1] = s[x].alpha_row[2];
6588                         d[x].alpha_row[2] = s[x].alpha_row[1];
6589                         d[x].alpha_row[3] = s[x].alpha_row[0];
6590                         d[x].color[0] = s[x].color[0];
6591                         d[x].color[1] = s[x].color[1];
6592                         d[x].control_row[0] = s[x].control_row[3];
6593                         d[x].control_row[1] = s[x].control_row[2];
6594                         d[x].control_row[2] = s[x].control_row[1];
6595                         d[x].control_row[3] = s[x].control_row[0];
6596                     }
6597                     src_row -= src_pitch;
6598                     dst_row += dst_pitch;
6599                 }
6600                 return WINED3D_OK;
6601
6602             default:
6603                 FIXME("Compressed flip not implemented for format %s.\n",
6604                         debug_d3dformat(format->id));
6605                 return E_NOTIMPL;
6606         }
6607     }
6608
6609     FIXME("Unsupported blit on compressed surface (format %s, flags %#x, DDFX %#x).\n",
6610             debug_d3dformat(format->id), flags, flags & WINEDDBLT_DDFX ? fx->dwDDFX : 0);
6611
6612     return E_NOTIMPL;
6613 }
6614
6615 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6616         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6617         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter)
6618 {
6619     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6620     const struct wined3d_format *src_format, *dst_format;
6621     struct wined3d_surface *orig_src = src_surface;
6622     struct wined3d_map_desc dst_map, src_map;
6623     const BYTE *sbase = NULL;
6624     HRESULT hr = WINED3D_OK;
6625     const BYTE *sbuf;
6626     BYTE *dbuf;
6627     int x, y;
6628
6629     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6630             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6631             flags, fx, debug_d3dtexturefiltertype(filter));
6632
6633     if (src_surface == dst_surface)
6634     {
6635         wined3d_surface_map(dst_surface, &dst_map, NULL, 0);
6636         src_map = dst_map;
6637         src_format = dst_surface->resource.format;
6638         dst_format = src_format;
6639     }
6640     else
6641     {
6642         dst_format = dst_surface->resource.format;
6643         if (src_surface)
6644         {
6645             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6646             {
6647                 src_surface = surface_convert_format(src_surface, dst_format->id);
6648                 if (!src_surface)
6649                 {
6650                     /* The conv function writes a FIXME */
6651                     WARN("Cannot convert source surface format to dest format.\n");
6652                     goto release;
6653                 }
6654             }
6655             wined3d_surface_map(src_surface, &src_map, NULL, WINED3D_MAP_READONLY);
6656             src_format = src_surface->resource.format;
6657         }
6658         else
6659         {
6660             src_format = dst_format;
6661         }
6662
6663         wined3d_surface_map(dst_surface, &dst_map, dst_rect, 0);
6664     }
6665
6666     bpp = dst_surface->resource.format->byte_count;
6667     srcheight = src_rect->bottom - src_rect->top;
6668     srcwidth = src_rect->right - src_rect->left;
6669     dstheight = dst_rect->bottom - dst_rect->top;
6670     dstwidth = dst_rect->right - dst_rect->left;
6671     width = (dst_rect->right - dst_rect->left) * bpp;
6672
6673     if (src_surface)
6674         sbase = (BYTE *)src_map.data
6675                 + ((src_rect->top / src_format->block_height) * src_map.row_pitch)
6676                 + ((src_rect->left / src_format->block_width) * src_format->block_byte_count);
6677     if (src_surface != dst_surface)
6678         dbuf = dst_map.data;
6679     else
6680         dbuf = (BYTE *)dst_map.data
6681                 + ((dst_rect->top / dst_format->block_height) * dst_map.row_pitch)
6682                 + ((dst_rect->left / dst_format->block_width) * dst_format->block_byte_count);
6683
6684     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_BLOCKS)
6685     {
6686         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6687
6688         if (src_surface == dst_surface)
6689         {
6690             FIXME("Only plain blits supported on compressed surfaces.\n");
6691             hr = E_NOTIMPL;
6692             goto release;
6693         }
6694
6695         if (srcheight != dstheight || srcwidth != dstwidth)
6696         {
6697             WARN("Stretching not supported on compressed surfaces.\n");
6698             hr = WINED3DERR_INVALIDCALL;
6699             goto release;
6700         }
6701
6702         if (!surface_check_block_align(src_surface, src_rect))
6703         {
6704             WARN("Source rectangle not block-aligned.\n");
6705             hr = WINED3DERR_INVALIDCALL;
6706             goto release;
6707         }
6708
6709         if (!surface_check_block_align(dst_surface, dst_rect))
6710         {
6711             WARN("Destination rectangle not block-aligned.\n");
6712             hr = WINED3DERR_INVALIDCALL;
6713             goto release;
6714         }
6715
6716         hr = surface_cpu_blt_compressed(sbase, dbuf,
6717                 src_map.row_pitch, dst_map.row_pitch, dstwidth, dstheight,
6718                 src_format, flags, fx);
6719         goto release;
6720     }
6721
6722     /* First, all the 'source-less' blits */
6723     if (flags & WINEDDBLT_COLORFILL)
6724     {
6725         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, fx->u5.dwFillColor);
6726         flags &= ~WINEDDBLT_COLORFILL;
6727     }
6728
6729     if (flags & WINEDDBLT_DEPTHFILL)
6730     {
6731         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6732     }
6733     if (flags & WINEDDBLT_ROP)
6734     {
6735         /* Catch some degenerate cases here. */
6736         switch (fx->dwROP)
6737         {
6738             case BLACKNESS:
6739                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, 0);
6740                 break;
6741             case 0xAA0029: /* No-op */
6742                 break;
6743             case WHITENESS:
6744                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, ~0U);
6745                 break;
6746             case SRCCOPY: /* Well, we do that below? */
6747                 break;
6748             default:
6749                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6750                 goto error;
6751         }
6752         flags &= ~WINEDDBLT_ROP;
6753     }
6754     if (flags & WINEDDBLT_DDROPS)
6755     {
6756         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6757     }
6758     /* Now the 'with source' blits. */
6759     if (src_surface)
6760     {
6761         int sx, xinc, sy, yinc;
6762
6763         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6764             goto release;
6765
6766         if (filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT
6767                 && (srcwidth != dstwidth || srcheight != dstheight))
6768         {
6769             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6770             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6771         }
6772
6773         xinc = (srcwidth << 16) / dstwidth;
6774         yinc = (srcheight << 16) / dstheight;
6775
6776         if (!flags)
6777         {
6778             /* No effects, we can cheat here. */
6779             if (dstwidth == srcwidth)
6780             {
6781                 if (dstheight == srcheight)
6782                 {
6783                     /* No stretching in either direction. This needs to be as
6784                      * fast as possible. */
6785                     sbuf = sbase;
6786
6787                     /* Check for overlapping surfaces. */
6788                     if (src_surface != dst_surface || dst_rect->top < src_rect->top
6789                             || dst_rect->right <= src_rect->left || src_rect->right <= dst_rect->left)
6790                     {
6791                         /* No overlap, or dst above src, so copy from top downwards. */
6792                         for (y = 0; y < dstheight; ++y)
6793                         {
6794                             memcpy(dbuf, sbuf, width);
6795                             sbuf += src_map.row_pitch;
6796                             dbuf += dst_map.row_pitch;
6797                         }
6798                     }
6799                     else if (dst_rect->top > src_rect->top)
6800                     {
6801                         /* Copy from bottom upwards. */
6802                         sbuf += src_map.row_pitch * dstheight;
6803                         dbuf += dst_map.row_pitch * dstheight;
6804                         for (y = 0; y < dstheight; ++y)
6805                         {
6806                             sbuf -= src_map.row_pitch;
6807                             dbuf -= dst_map.row_pitch;
6808                             memcpy(dbuf, sbuf, width);
6809                         }
6810                     }
6811                     else
6812                     {
6813                         /* Src and dst overlapping on the same line, use memmove. */
6814                         for (y = 0; y < dstheight; ++y)
6815                         {
6816                             memmove(dbuf, sbuf, width);
6817                             sbuf += src_map.row_pitch;
6818                             dbuf += dst_map.row_pitch;
6819                         }
6820                     }
6821                 }
6822                 else
6823                 {
6824                     /* Stretching in y direction only. */
6825                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6826                     {
6827                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6828                         memcpy(dbuf, sbuf, width);
6829                         dbuf += dst_map.row_pitch;
6830                     }
6831                 }
6832             }
6833             else
6834             {
6835                 /* Stretching in X direction. */
6836                 int last_sy = -1;
6837                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6838                 {
6839                     sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6840
6841                     if ((sy >> 16) == (last_sy >> 16))
6842                     {
6843                         /* This source row is the same as last source row -
6844                          * Copy the already stretched row. */
6845                         memcpy(dbuf, dbuf - dst_map.row_pitch, width);
6846                     }
6847                     else
6848                     {
6849 #define STRETCH_ROW(type) \
6850 do { \
6851     const type *s = (const type *)sbuf; \
6852     type *d = (type *)dbuf; \
6853     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6854         d[x] = s[sx >> 16]; \
6855 } while(0)
6856
6857                         switch(bpp)
6858                         {
6859                             case 1:
6860                                 STRETCH_ROW(BYTE);
6861                                 break;
6862                             case 2:
6863                                 STRETCH_ROW(WORD);
6864                                 break;
6865                             case 4:
6866                                 STRETCH_ROW(DWORD);
6867                                 break;
6868                             case 3:
6869                             {
6870                                 const BYTE *s;
6871                                 BYTE *d = dbuf;
6872                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6873                                 {
6874                                     DWORD pixel;
6875
6876                                     s = sbuf + 3 * (sx >> 16);
6877                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6878                                     d[0] = (pixel      ) & 0xff;
6879                                     d[1] = (pixel >>  8) & 0xff;
6880                                     d[2] = (pixel >> 16) & 0xff;
6881                                     d += 3;
6882                                 }
6883                                 break;
6884                             }
6885                             default:
6886                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6887                                 hr = WINED3DERR_NOTAVAILABLE;
6888                                 goto error;
6889                         }
6890 #undef STRETCH_ROW
6891                     }
6892                     dbuf += dst_map.row_pitch;
6893                     last_sy = sy;
6894                 }
6895             }
6896         }
6897         else
6898         {
6899             LONG dstyinc = dst_map.row_pitch, dstxinc = bpp;
6900             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6901             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6902             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6903             {
6904                 /* The color keying flags are checked for correctness in ddraw */
6905                 if (flags & WINEDDBLT_KEYSRC)
6906                 {
6907                     keylow  = src_surface->src_blt_color_key.color_space_low_value;
6908                     keyhigh = src_surface->src_blt_color_key.color_space_high_value;
6909                 }
6910                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6911                 {
6912                     keylow = fx->ddckSrcColorkey.color_space_low_value;
6913                     keyhigh = fx->ddckSrcColorkey.color_space_high_value;
6914                 }
6915
6916                 if (flags & WINEDDBLT_KEYDEST)
6917                 {
6918                     /* Destination color keys are taken from the source surface! */
6919                     destkeylow = src_surface->dst_blt_color_key.color_space_low_value;
6920                     destkeyhigh = src_surface->dst_blt_color_key.color_space_high_value;
6921                 }
6922                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6923                 {
6924                     destkeylow = fx->ddckDestColorkey.color_space_low_value;
6925                     destkeyhigh = fx->ddckDestColorkey.color_space_high_value;
6926                 }
6927
6928                 if (bpp == 1)
6929                 {
6930                     keymask = 0xff;
6931                 }
6932                 else
6933                 {
6934                     keymask = src_format->red_mask
6935                             | src_format->green_mask
6936                             | src_format->blue_mask;
6937                 }
6938                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6939             }
6940
6941             if (flags & WINEDDBLT_DDFX)
6942             {
6943                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6944                 LONG tmpxy;
6945                 dTopLeft     = dbuf;
6946                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6947                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dst_map.row_pitch);
6948                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6949
6950                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6951                 {
6952                     /* I don't think we need to do anything about this flag */
6953                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6954                 }
6955                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6956                 {
6957                     tmp          = dTopRight;
6958                     dTopRight    = dTopLeft;
6959                     dTopLeft     = tmp;
6960                     tmp          = dBottomRight;
6961                     dBottomRight = dBottomLeft;
6962                     dBottomLeft  = tmp;
6963                     dstxinc = dstxinc * -1;
6964                 }
6965                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6966                 {
6967                     tmp          = dTopLeft;
6968                     dTopLeft     = dBottomLeft;
6969                     dBottomLeft  = tmp;
6970                     tmp          = dTopRight;
6971                     dTopRight    = dBottomRight;
6972                     dBottomRight = tmp;
6973                     dstyinc = dstyinc * -1;
6974                 }
6975                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6976                 {
6977                     /* I don't think we need to do anything about this flag */
6978                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6979                 }
6980                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6981                 {
6982                     tmp          = dBottomRight;
6983                     dBottomRight = dTopLeft;
6984                     dTopLeft     = tmp;
6985                     tmp          = dBottomLeft;
6986                     dBottomLeft  = dTopRight;
6987                     dTopRight    = tmp;
6988                     dstxinc = dstxinc * -1;
6989                     dstyinc = dstyinc * -1;
6990                 }
6991                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6992                 {
6993                     tmp          = dTopLeft;
6994                     dTopLeft     = dBottomLeft;
6995                     dBottomLeft  = dBottomRight;
6996                     dBottomRight = dTopRight;
6997                     dTopRight    = tmp;
6998                     tmpxy   = dstxinc;
6999                     dstxinc = dstyinc;
7000                     dstyinc = tmpxy;
7001                     dstxinc = dstxinc * -1;
7002                 }
7003                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
7004                 {
7005                     tmp          = dTopLeft;
7006                     dTopLeft     = dTopRight;
7007                     dTopRight    = dBottomRight;
7008                     dBottomRight = dBottomLeft;
7009                     dBottomLeft  = tmp;
7010                     tmpxy   = dstxinc;
7011                     dstxinc = dstyinc;
7012                     dstyinc = tmpxy;
7013                     dstyinc = dstyinc * -1;
7014                 }
7015                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
7016                 {
7017                     /* I don't think we need to do anything about this flag */
7018                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
7019                 }
7020                 dbuf = dTopLeft;
7021                 flags &= ~(WINEDDBLT_DDFX);
7022             }
7023
7024 #define COPY_COLORKEY_FX(type) \
7025 do { \
7026     const type *s; \
7027     type *d = (type *)dbuf, *dx, tmp; \
7028     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
7029     { \
7030         s = (const type *)(sbase + (sy >> 16) * src_map.row_pitch); \
7031         dx = d; \
7032         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
7033         { \
7034             tmp = s[sx >> 16]; \
7035             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
7036                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
7037             { \
7038                 dx[0] = tmp; \
7039             } \
7040             dx = (type *)(((BYTE *)dx) + dstxinc); \
7041         } \
7042         d = (type *)(((BYTE *)d) + dstyinc); \
7043     } \
7044 } while(0)
7045
7046             switch (bpp)
7047             {
7048                 case 1:
7049                     COPY_COLORKEY_FX(BYTE);
7050                     break;
7051                 case 2:
7052                     COPY_COLORKEY_FX(WORD);
7053                     break;
7054                 case 4:
7055                     COPY_COLORKEY_FX(DWORD);
7056                     break;
7057                 case 3:
7058                 {
7059                     const BYTE *s;
7060                     BYTE *d = dbuf, *dx;
7061                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
7062                     {
7063                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
7064                         dx = d;
7065                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7066                         {
7067                             DWORD pixel, dpixel = 0;
7068                             s = sbuf + 3 * (sx>>16);
7069                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7070                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7071                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7072                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7073                             {
7074                                 dx[0] = (pixel      ) & 0xff;
7075                                 dx[1] = (pixel >>  8) & 0xff;
7076                                 dx[2] = (pixel >> 16) & 0xff;
7077                             }
7078                             dx += dstxinc;
7079                         }
7080                         d += dstyinc;
7081                     }
7082                     break;
7083                 }
7084                 default:
7085                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7086                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7087                     hr = WINED3DERR_NOTAVAILABLE;
7088                     goto error;
7089 #undef COPY_COLORKEY_FX
7090             }
7091         }
7092     }
7093
7094 error:
7095     if (flags && FIXME_ON(d3d_surface))
7096     {
7097         FIXME("\tUnsupported flags: %#x.\n", flags);
7098     }
7099
7100 release:
7101     wined3d_surface_unmap(dst_surface);
7102     if (src_surface && src_surface != dst_surface)
7103         wined3d_surface_unmap(src_surface);
7104     /* Release the converted surface, if any. */
7105     if (src_surface && src_surface != orig_src)
7106         wined3d_surface_decref(src_surface);
7107
7108     return hr;
7109 }
7110
7111 /* Do not call while under the GL lock. */
7112 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7113         const RECT *dst_rect, const struct wined3d_color *color)
7114 {
7115     static const RECT src_rect;
7116     WINEDDBLTFX BltFx;
7117
7118     memset(&BltFx, 0, sizeof(BltFx));
7119     BltFx.dwSize = sizeof(BltFx);
7120     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7121     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7122             WINEDDBLT_COLORFILL, &BltFx, WINED3D_TEXF_POINT);
7123 }
7124
7125 /* Do not call while under the GL lock. */
7126 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7127         struct wined3d_surface *surface, const RECT *rect, float depth)
7128 {
7129     FIXME("Depth filling not implemented by cpu_blit.\n");
7130     return WINED3DERR_INVALIDCALL;
7131 }
7132
7133 const struct blit_shader cpu_blit =  {
7134     cpu_blit_alloc,
7135     cpu_blit_free,
7136     cpu_blit_set,
7137     cpu_blit_unset,
7138     cpu_blit_supported,
7139     cpu_blit_color_fill,
7140     cpu_blit_depth_fill,
7141 };
7142
7143 static HRESULT surface_init(struct wined3d_surface *surface, enum wined3d_surface_type surface_type, UINT alignment,
7144         UINT width, UINT height, UINT level, enum wined3d_multisample_type multisample_type,
7145         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7146         enum wined3d_pool pool, DWORD flags, void *parent, const struct wined3d_parent_ops *parent_ops)
7147 {
7148     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7149     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7150     BOOL lockable = flags & WINED3D_SURFACE_MAPPABLE;
7151     unsigned int resource_size;
7152     HRESULT hr;
7153
7154     if (multisample_quality > 0)
7155     {
7156         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7157         multisample_quality = 0;
7158     }
7159
7160     /* Quick lockable sanity check.
7161      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7162      * this function is too deep to need to care about things like this.
7163      * Levels need to be checked too, since they all affect what can be done. */
7164     switch (pool)
7165     {
7166         case WINED3D_POOL_SCRATCH:
7167             if (!lockable)
7168             {
7169                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7170                         "which are mutually exclusive, setting lockable to TRUE.\n");
7171                 lockable = TRUE;
7172             }
7173             break;
7174
7175         case WINED3D_POOL_SYSTEM_MEM:
7176             if (!lockable)
7177                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7178             break;
7179
7180         case WINED3D_POOL_MANAGED:
7181             if (usage & WINED3DUSAGE_DYNAMIC)
7182                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7183             break;
7184
7185         case WINED3D_POOL_DEFAULT:
7186             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7187                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7188             break;
7189
7190         default:
7191             FIXME("Unknown pool %#x.\n", pool);
7192             break;
7193     };
7194
7195     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3D_POOL_DEFAULT)
7196         FIXME("Trying to create a render target that isn't in the default pool.\n");
7197
7198     /* FIXME: Check that the format is supported by the device. */
7199
7200     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7201     if (!resource_size)
7202         return WINED3DERR_INVALIDCALL;
7203
7204     surface->surface_type = surface_type;
7205
7206     switch (surface_type)
7207     {
7208         case WINED3D_SURFACE_TYPE_OPENGL:
7209             surface->surface_ops = &surface_ops;
7210             break;
7211
7212         case WINED3D_SURFACE_TYPE_GDI:
7213             surface->surface_ops = &gdi_surface_ops;
7214             break;
7215
7216         default:
7217             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7218             return WINED3DERR_INVALIDCALL;
7219     }
7220
7221     hr = resource_init(&surface->resource, device, WINED3D_RTYPE_SURFACE, format,
7222             multisample_type, multisample_quality, usage, pool, width, height, 1,
7223             resource_size, parent, parent_ops, &surface_resource_ops);
7224     if (FAILED(hr))
7225     {
7226         WARN("Failed to initialize resource, returning %#x.\n", hr);
7227         return hr;
7228     }
7229
7230     /* "Standalone" surface. */
7231     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7232
7233     surface->texture_level = level;
7234     list_init(&surface->overlays);
7235
7236     /* Flags */
7237     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7238     if (flags & WINED3D_SURFACE_DISCARD)
7239         surface->flags |= SFLAG_DISCARD;
7240     if (flags & WINED3D_SURFACE_PIN_SYSMEM)
7241         surface->flags |= SFLAG_PIN_SYSMEM;
7242     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7243         surface->flags |= SFLAG_LOCKABLE;
7244     /* I'm not sure if this qualifies as a hack or as an optimization. It
7245      * seems reasonable to assume that lockable render targets will get
7246      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7247      * creation. However, the other reason we want to do this is that several
7248      * ddraw applications access surface memory while the surface isn't
7249      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7250      * future locks prevents these from crashing. */
7251     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7252         surface->flags |= SFLAG_DYNLOCK;
7253
7254     /* Mark the texture as dirty so that it gets loaded first time around. */
7255     surface_add_dirty_rect(surface, NULL);
7256     list_init(&surface->renderbuffers);
7257
7258     TRACE("surface %p, memory %p, size %u\n",
7259             surface, surface->resource.allocatedMemory, surface->resource.size);
7260
7261     /* Call the private setup routine */
7262     hr = surface->surface_ops->surface_private_setup(surface);
7263     if (FAILED(hr))
7264     {
7265         ERR("Private setup failed, returning %#x\n", hr);
7266         surface_cleanup(surface);
7267         return hr;
7268     }
7269
7270     /* Similar to lockable rendertargets above, creating the DIB section
7271      * during surface initialization prevents the sysmem pointer from changing
7272      * after a wined3d_surface_getdc() call. */
7273     if ((usage & WINED3DUSAGE_OWNDC) && !surface->hDC
7274             && SUCCEEDED(surface_create_dib_section(surface)))
7275     {
7276         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
7277         surface->resource.heapMemory = NULL;
7278         surface->resource.allocatedMemory = surface->dib.bitmap_data;
7279     }
7280
7281     return hr;
7282 }
7283
7284 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7285         enum wined3d_format_id format_id, UINT level, DWORD usage, enum wined3d_pool pool,
7286         enum wined3d_multisample_type multisample_type, DWORD multisample_quality,
7287         enum wined3d_surface_type surface_type, DWORD flags, void *parent,
7288         const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7289 {
7290     struct wined3d_surface *object;
7291     HRESULT hr;
7292
7293     TRACE("device %p, width %u, height %u, format %s, level %u\n",
7294             device, width, height, debug_d3dformat(format_id), level);
7295     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7296             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7297     TRACE("surface_type %#x, flags %#x, parent %p, parent_ops %p.\n", surface_type, flags, parent, parent_ops);
7298
7299     if (surface_type == WINED3D_SURFACE_TYPE_OPENGL && !device->adapter)
7300     {
7301         ERR("OpenGL surfaces are not available without OpenGL.\n");
7302         return WINED3DERR_NOTAVAILABLE;
7303     }
7304
7305     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7306     if (!object)
7307     {
7308         ERR("Failed to allocate surface memory.\n");
7309         return WINED3DERR_OUTOFVIDEOMEMORY;
7310     }
7311
7312     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level,
7313             multisample_type, multisample_quality, device, usage, format_id, pool, flags, parent, parent_ops);
7314     if (FAILED(hr))
7315     {
7316         WARN("Failed to initialize surface, returning %#x.\n", hr);
7317         HeapFree(GetProcessHeap(), 0, object);
7318         return hr;
7319     }
7320
7321     TRACE("Created surface %p.\n", object);
7322     *surface = object;
7323
7324     return hr;
7325 }