wined3d: Only unset the onscreen depth / stencil buffer once in wined3d_device_reset().
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2011 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         enum wined3d_texture_filter_type filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     struct wined3d_surface *overlay, *cur;
46
47     TRACE("surface %p.\n", surface);
48
49     if (surface->texture_name || (surface->flags & SFLAG_PBO)
50              || surface->rb_multisample || surface->rb_resolved
51              || !list_empty(&surface->renderbuffers))
52     {
53         struct wined3d_renderbuffer_entry *entry, *entry2;
54         const struct wined3d_gl_info *gl_info;
55         struct wined3d_context *context;
56
57         context = context_acquire(surface->resource.device, NULL);
58         gl_info = context->gl_info;
59
60         ENTER_GL();
61
62         if (surface->texture_name)
63         {
64             TRACE("Deleting texture %u.\n", surface->texture_name);
65             glDeleteTextures(1, &surface->texture_name);
66         }
67
68         if (surface->flags & SFLAG_PBO)
69         {
70             TRACE("Deleting PBO %u.\n", surface->pbo);
71             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
72         }
73
74         if (surface->rb_multisample)
75         {
76             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
77             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
78         }
79
80         if (surface->rb_resolved)
81         {
82             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
83             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
84         }
85
86         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
87         {
88             TRACE("Deleting renderbuffer %u.\n", entry->id);
89             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
90             HeapFree(GetProcessHeap(), 0, entry);
91         }
92
93         LEAVE_GL();
94
95         context_release(context);
96     }
97
98     if (surface->flags & SFLAG_DIBSECTION)
99     {
100         DeleteDC(surface->hDC);
101         DeleteObject(surface->dib.DIBsection);
102         surface->dib.bitmap_data = NULL;
103         surface->resource.allocatedMemory = NULL;
104     }
105
106     if (surface->flags & SFLAG_USERPTR)
107         wined3d_surface_set_mem(surface, NULL);
108     if (surface->overlay_dest)
109         list_remove(&surface->overlay_entry);
110
111     LIST_FOR_EACH_ENTRY_SAFE(overlay, cur, &surface->overlays, struct wined3d_surface, overlay_entry)
112     {
113         list_remove(&overlay->overlay_entry);
114         overlay->overlay_dest = NULL;
115     }
116
117     resource_cleanup(&surface->resource);
118 }
119
120 void surface_update_draw_binding(struct wined3d_surface *surface)
121 {
122     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
123         surface->draw_binding = SFLAG_INDRAWABLE;
124     else if (surface->resource.multisample_type)
125         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
126     else
127         surface->draw_binding = SFLAG_INTEXTURE;
128 }
129
130 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
131 {
132     TRACE("surface %p, container %p.\n", surface, container);
133
134     if (!container && type != WINED3D_CONTAINER_NONE)
135         ERR("Setting NULL container of type %#x.\n", type);
136
137     if (type == WINED3D_CONTAINER_SWAPCHAIN)
138     {
139         surface->get_drawable_size = get_drawable_size_swapchain;
140     }
141     else
142     {
143         switch (wined3d_settings.offscreen_rendering_mode)
144         {
145             case ORM_FBO:
146                 surface->get_drawable_size = get_drawable_size_fbo;
147                 break;
148
149             case ORM_BACKBUFFER:
150                 surface->get_drawable_size = get_drawable_size_backbuffer;
151                 break;
152
153             default:
154                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
155                 return;
156         }
157     }
158
159     surface->container.type = type;
160     surface->container.u.base = container;
161     surface_update_draw_binding(surface);
162 }
163
164 struct blt_info
165 {
166     GLenum binding;
167     GLenum bind_target;
168     enum tex_types tex_type;
169     GLfloat coords[4][3];
170 };
171
172 struct float_rect
173 {
174     float l;
175     float t;
176     float r;
177     float b;
178 };
179
180 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
181 {
182     f->l = ((r->left * 2.0f) / w) - 1.0f;
183     f->t = ((r->top * 2.0f) / h) - 1.0f;
184     f->r = ((r->right * 2.0f) / w) - 1.0f;
185     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
186 }
187
188 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
189 {
190     GLfloat (*coords)[3] = info->coords;
191     struct float_rect f;
192
193     switch (target)
194     {
195         default:
196             FIXME("Unsupported texture target %#x\n", target);
197             /* Fall back to GL_TEXTURE_2D */
198         case GL_TEXTURE_2D:
199             info->binding = GL_TEXTURE_BINDING_2D;
200             info->bind_target = GL_TEXTURE_2D;
201             info->tex_type = tex_2d;
202             coords[0][0] = (float)rect->left / w;
203             coords[0][1] = (float)rect->top / h;
204             coords[0][2] = 0.0f;
205
206             coords[1][0] = (float)rect->right / w;
207             coords[1][1] = (float)rect->top / h;
208             coords[1][2] = 0.0f;
209
210             coords[2][0] = (float)rect->left / w;
211             coords[2][1] = (float)rect->bottom / h;
212             coords[2][2] = 0.0f;
213
214             coords[3][0] = (float)rect->right / w;
215             coords[3][1] = (float)rect->bottom / h;
216             coords[3][2] = 0.0f;
217             break;
218
219         case GL_TEXTURE_RECTANGLE_ARB:
220             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
221             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
222             info->tex_type = tex_rect;
223             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
224             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
225             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
226             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
227             break;
228
229         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
230             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
231             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
232             info->tex_type = tex_cube;
233             cube_coords_float(rect, w, h, &f);
234
235             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
236             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
237             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
238             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
239             break;
240
241         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
242             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
243             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
244             info->tex_type = tex_cube;
245             cube_coords_float(rect, w, h, &f);
246
247             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
248             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
249             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
250             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
251             break;
252
253         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
254             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
255             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
256             info->tex_type = tex_cube;
257             cube_coords_float(rect, w, h, &f);
258
259             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
260             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
261             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
262             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
263             break;
264
265         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
266             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
267             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
268             info->tex_type = tex_cube;
269             cube_coords_float(rect, w, h, &f);
270
271             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
272             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
273             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
274             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
275             break;
276
277         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
278             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
279             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
280             info->tex_type = tex_cube;
281             cube_coords_float(rect, w, h, &f);
282
283             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
284             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
285             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
286             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
287             break;
288
289         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
290             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
291             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
292             info->tex_type = tex_cube;
293             cube_coords_float(rect, w, h, &f);
294
295             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
296             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
297             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
298             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
299             break;
300     }
301 }
302
303 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
304 {
305     if (rect_in)
306         *rect_out = *rect_in;
307     else
308     {
309         rect_out->left = 0;
310         rect_out->top = 0;
311         rect_out->right = surface->resource.width;
312         rect_out->bottom = surface->resource.height;
313     }
314 }
315
316 /* GL locking and context activation is done by the caller */
317 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
318         const RECT *src_rect, const RECT *dst_rect, enum wined3d_texture_filter_type filter)
319 {
320     struct blt_info info;
321
322     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
323
324     glEnable(info.bind_target);
325     checkGLcall("glEnable(bind_target)");
326
327     context_bind_texture(context, info.bind_target, src_surface->texture_name);
328
329     /* Filtering for StretchRect */
330     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
331             wined3d_gl_mag_filter(magLookup, filter));
332     checkGLcall("glTexParameteri");
333     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
334             wined3d_gl_min_mip_filter(minMipLookup, filter, WINED3D_TEXF_NONE));
335     checkGLcall("glTexParameteri");
336     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
337     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
338     if (context->gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
339         glTexParameteri(info.bind_target, GL_TEXTURE_SRGB_DECODE_EXT, GL_SKIP_DECODE_EXT);
340     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
341     checkGLcall("glTexEnvi");
342
343     /* Draw a quad */
344     glBegin(GL_TRIANGLE_STRIP);
345     glTexCoord3fv(info.coords[0]);
346     glVertex2i(dst_rect->left, dst_rect->top);
347
348     glTexCoord3fv(info.coords[1]);
349     glVertex2i(dst_rect->right, dst_rect->top);
350
351     glTexCoord3fv(info.coords[2]);
352     glVertex2i(dst_rect->left, dst_rect->bottom);
353
354     glTexCoord3fv(info.coords[3]);
355     glVertex2i(dst_rect->right, dst_rect->bottom);
356     glEnd();
357
358     /* Unbind the texture */
359     context_bind_texture(context, info.bind_target, 0);
360
361     /* We changed the filtering settings on the texture. Inform the
362      * container about this to get the filters reset properly next draw. */
363     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
364     {
365         struct wined3d_texture *texture = src_surface->container.u.texture;
366         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3D_TEXF_POINT;
367         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3D_TEXF_POINT;
368         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3D_TEXF_NONE;
369         texture->texture_rgb.states[WINED3DTEXSTA_SRGBTEXTURE] = FALSE;
370     }
371 }
372
373 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
374 {
375     const struct wined3d_format *format = surface->resource.format;
376     SYSTEM_INFO sysInfo;
377     BITMAPINFO *b_info;
378     int extraline = 0;
379     DWORD *masks;
380
381     TRACE("surface %p.\n", surface);
382
383     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
384     {
385         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
386         return WINED3DERR_INVALIDCALL;
387     }
388
389     switch (format->byte_count)
390     {
391         case 2:
392         case 4:
393             /* Allocate extra space to store the RGB bit masks. */
394             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
395             break;
396
397         case 3:
398             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
399             break;
400
401         default:
402             /* Allocate extra space for a palette. */
403             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
404                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
405             break;
406     }
407
408     if (!b_info)
409         return E_OUTOFMEMORY;
410
411     /* Some applications access the surface in via DWORDs, and do not take
412      * the necessary care at the end of the surface. So we need at least
413      * 4 extra bytes at the end of the surface. Check against the page size,
414      * if the last page used for the surface has at least 4 spare bytes we're
415      * safe, otherwise add an extra line to the DIB section. */
416     GetSystemInfo(&sysInfo);
417     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
418     {
419         extraline = 1;
420         TRACE("Adding an extra line to the DIB section.\n");
421     }
422
423     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
424     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
425     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
426     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
427     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
428             * wined3d_surface_get_pitch(surface);
429     b_info->bmiHeader.biPlanes = 1;
430     b_info->bmiHeader.biBitCount = format->byte_count * 8;
431
432     b_info->bmiHeader.biXPelsPerMeter = 0;
433     b_info->bmiHeader.biYPelsPerMeter = 0;
434     b_info->bmiHeader.biClrUsed = 0;
435     b_info->bmiHeader.biClrImportant = 0;
436
437     /* Get the bit masks */
438     masks = (DWORD *)b_info->bmiColors;
439     switch (surface->resource.format->id)
440     {
441         case WINED3DFMT_B8G8R8_UNORM:
442             b_info->bmiHeader.biCompression = BI_RGB;
443             break;
444
445         case WINED3DFMT_B5G5R5X1_UNORM:
446         case WINED3DFMT_B5G5R5A1_UNORM:
447         case WINED3DFMT_B4G4R4A4_UNORM:
448         case WINED3DFMT_B4G4R4X4_UNORM:
449         case WINED3DFMT_B2G3R3_UNORM:
450         case WINED3DFMT_B2G3R3A8_UNORM:
451         case WINED3DFMT_R10G10B10A2_UNORM:
452         case WINED3DFMT_R8G8B8A8_UNORM:
453         case WINED3DFMT_R8G8B8X8_UNORM:
454         case WINED3DFMT_B10G10R10A2_UNORM:
455         case WINED3DFMT_B5G6R5_UNORM:
456         case WINED3DFMT_R16G16B16A16_UNORM:
457             b_info->bmiHeader.biCompression = BI_BITFIELDS;
458             masks[0] = format->red_mask;
459             masks[1] = format->green_mask;
460             masks[2] = format->blue_mask;
461             break;
462
463         default:
464             /* Don't know palette */
465             b_info->bmiHeader.biCompression = BI_RGB;
466             break;
467     }
468
469     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
470             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
471             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
472     surface->dib.DIBsection = CreateDIBSection(0, b_info, DIB_RGB_COLORS, &surface->dib.bitmap_data, 0, 0);
473
474     if (!surface->dib.DIBsection)
475     {
476         ERR("Failed to create DIB section.\n");
477         HeapFree(GetProcessHeap(), 0, b_info);
478         return HRESULT_FROM_WIN32(GetLastError());
479     }
480
481     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
482     /* Copy the existing surface to the dib section. */
483     if (surface->resource.allocatedMemory)
484     {
485         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
486                 surface->resource.height * wined3d_surface_get_pitch(surface));
487     }
488     else
489     {
490         /* This is to make maps read the GL texture although memory is allocated. */
491         surface->flags &= ~SFLAG_INSYSMEM;
492     }
493     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
494
495     HeapFree(GetProcessHeap(), 0, b_info);
496
497     /* Now allocate a DC. */
498     surface->hDC = CreateCompatibleDC(0);
499     SelectObject(surface->hDC, surface->dib.DIBsection);
500     TRACE("Using wined3d palette %p.\n", surface->palette);
501     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
502
503     surface->flags |= SFLAG_DIBSECTION;
504
505     return WINED3D_OK;
506 }
507
508 static BOOL surface_need_pbo(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
509 {
510     if (surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
511         return FALSE;
512     if (!(surface->flags & SFLAG_DYNLOCK))
513         return FALSE;
514     if (surface->flags & (SFLAG_CONVERTED | SFLAG_NONPOW2 | SFLAG_PIN_SYSMEM))
515         return FALSE;
516     if (!gl_info->supported[ARB_PIXEL_BUFFER_OBJECT])
517         return FALSE;
518
519     return TRUE;
520 }
521
522 static void surface_load_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
523 {
524     struct wined3d_context *context;
525     GLenum error;
526
527     context = context_acquire(surface->resource.device, NULL);
528     ENTER_GL();
529
530     GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
531     error = glGetError();
532     if (!surface->pbo || error != GL_NO_ERROR)
533         ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
534
535     TRACE("Binding PBO %u.\n", surface->pbo);
536
537     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
538     checkGLcall("glBindBufferARB");
539
540     GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
541             surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
542     checkGLcall("glBufferDataARB");
543
544     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
545     checkGLcall("glBindBufferARB");
546
547     /* We don't need the system memory anymore and we can't even use it for PBOs. */
548     if (!(surface->flags & SFLAG_CLIENT))
549     {
550         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
551         surface->resource.heapMemory = NULL;
552     }
553     surface->resource.allocatedMemory = NULL;
554     surface->flags |= SFLAG_PBO;
555     LEAVE_GL();
556     context_release(context);
557 }
558
559 static void surface_prepare_system_memory(struct wined3d_surface *surface)
560 {
561     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
562
563     TRACE("surface %p.\n", surface);
564
565     if (!(surface->flags & SFLAG_PBO) && surface_need_pbo(surface, gl_info))
566         surface_load_pbo(surface, gl_info);
567     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
568     {
569         /* Whatever surface we have, make sure that there is memory allocated
570          * for the downloaded copy, or a PBO to map. */
571         if (!surface->resource.heapMemory)
572             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
573
574         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
575                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
576
577         if (surface->flags & SFLAG_INSYSMEM)
578             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
579     }
580 }
581
582 static void surface_evict_sysmem(struct wined3d_surface *surface)
583 {
584     if (surface->resource.map_count || (surface->flags & SFLAG_DONOTFREE))
585         return;
586
587     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
588     surface->resource.allocatedMemory = NULL;
589     surface->resource.heapMemory = NULL;
590     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
591 }
592
593 /* Context activation is done by the caller. */
594 static void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
595 {
596     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
597
598     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
599     {
600         struct wined3d_texture *texture = surface->container.u.texture;
601
602         TRACE("Passing to container (%p).\n", texture);
603         texture->texture_ops->texture_bind(texture, context, srgb);
604     }
605     else
606     {
607         if (surface->texture_level)
608         {
609             ERR("Standalone surface %p is non-zero texture level %u.\n",
610                     surface, surface->texture_level);
611         }
612
613         if (srgb)
614             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
615
616         ENTER_GL();
617
618         if (!surface->texture_name)
619         {
620             glGenTextures(1, &surface->texture_name);
621             checkGLcall("glGenTextures");
622
623             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
624
625             context_bind_texture(context, surface->texture_target, surface->texture_name);
626             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
627             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
628             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
629             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
630             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
631             checkGLcall("glTexParameteri");
632         }
633         else
634         {
635             context_bind_texture(context, surface->texture_target, surface->texture_name);
636         }
637
638         LEAVE_GL();
639     }
640 }
641
642 /* Context activation is done by the caller. */
643 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
644         struct wined3d_context *context, BOOL srgb)
645 {
646     struct wined3d_device *device = surface->resource.device;
647     DWORD active_sampler;
648
649     /* We don't need a specific texture unit, but after binding the texture
650      * the current unit is dirty. Read the unit back instead of switching to
651      * 0, this avoids messing around with the state manager's GL states. The
652      * current texture unit should always be a valid one.
653      *
654      * To be more specific, this is tricky because we can implicitly be
655      * called from sampler() in state.c. This means we can't touch anything
656      * other than whatever happens to be the currently active texture, or we
657      * would risk marking already applied sampler states dirty again. */
658     active_sampler = device->rev_tex_unit_map[context->active_texture];
659
660     if (active_sampler != WINED3D_UNMAPPED_STAGE)
661         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
662     surface_bind(surface, context, srgb);
663 }
664
665 static void surface_force_reload(struct wined3d_surface *surface)
666 {
667     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
668 }
669
670 static void surface_release_client_storage(struct wined3d_surface *surface)
671 {
672     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
673
674     ENTER_GL();
675     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
676     if (surface->texture_name)
677     {
678         surface_bind_and_dirtify(surface, context, FALSE);
679         glTexImage2D(surface->texture_target, surface->texture_level,
680                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
681     }
682     if (surface->texture_name_srgb)
683     {
684         surface_bind_and_dirtify(surface, context, TRUE);
685         glTexImage2D(surface->texture_target, surface->texture_level,
686                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
687     }
688     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
689     LEAVE_GL();
690
691     context_release(context);
692
693     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
694     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
695     surface_force_reload(surface);
696 }
697
698 static HRESULT surface_private_setup(struct wined3d_surface *surface)
699 {
700     /* TODO: Check against the maximum texture sizes supported by the video card. */
701     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
702     unsigned int pow2Width, pow2Height;
703
704     TRACE("surface %p.\n", surface);
705
706     surface->texture_name = 0;
707     surface->texture_target = GL_TEXTURE_2D;
708
709     /* Non-power2 support */
710     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
711     {
712         pow2Width = surface->resource.width;
713         pow2Height = surface->resource.height;
714     }
715     else
716     {
717         /* Find the nearest pow2 match */
718         pow2Width = pow2Height = 1;
719         while (pow2Width < surface->resource.width)
720             pow2Width <<= 1;
721         while (pow2Height < surface->resource.height)
722             pow2Height <<= 1;
723     }
724     surface->pow2Width = pow2Width;
725     surface->pow2Height = pow2Height;
726
727     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
728     {
729         /* TODO: Add support for non power two compressed textures. */
730         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
731         {
732             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
733                   surface, surface->resource.width, surface->resource.height);
734             return WINED3DERR_NOTAVAILABLE;
735         }
736     }
737
738     if (pow2Width != surface->resource.width
739             || pow2Height != surface->resource.height)
740     {
741         surface->flags |= SFLAG_NONPOW2;
742     }
743
744     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
745             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
746     {
747         /* One of three options:
748          * 1: Do the same as we do with NPOT and scale the texture, (any
749          *    texture ops would require the texture to be scaled which is
750          *    potentially slow)
751          * 2: Set the texture to the maximum size (bad idea).
752          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
753          * 4: Create the surface, but allow it to be used only for DirectDraw
754          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
755          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
756          *    the render target. */
757         if (surface->resource.pool == WINED3D_POOL_DEFAULT || surface->resource.pool == WINED3D_POOL_MANAGED)
758         {
759             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
760             return WINED3DERR_NOTAVAILABLE;
761         }
762
763         /* We should never use this surface in combination with OpenGL! */
764         TRACE("Creating an oversized surface: %ux%u.\n",
765                 surface->pow2Width, surface->pow2Height);
766     }
767     else
768     {
769         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
770          * and EXT_PALETTED_TEXTURE is used in combination with texture
771          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
772          * EXT_PALETTED_TEXTURE doesn't work in combination with
773          * ARB_TEXTURE_RECTANGLE. */
774         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
775                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
776                 && gl_info->supported[EXT_PALETTED_TEXTURE]
777                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
778         {
779             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
780             surface->pow2Width = surface->resource.width;
781             surface->pow2Height = surface->resource.height;
782             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
783         }
784     }
785
786     switch (wined3d_settings.offscreen_rendering_mode)
787     {
788         case ORM_FBO:
789             surface->get_drawable_size = get_drawable_size_fbo;
790             break;
791
792         case ORM_BACKBUFFER:
793             surface->get_drawable_size = get_drawable_size_backbuffer;
794             break;
795
796         default:
797             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
798             return WINED3DERR_INVALIDCALL;
799     }
800
801     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
802         surface->flags |= SFLAG_DISCARDED;
803
804     return WINED3D_OK;
805 }
806
807 static void surface_realize_palette(struct wined3d_surface *surface)
808 {
809     struct wined3d_palette *palette = surface->palette;
810
811     TRACE("surface %p.\n", surface);
812
813     if (!palette) return;
814
815     if (surface->resource.format->id == WINED3DFMT_P8_UINT
816             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
817     {
818         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
819         {
820             /* Make sure the texture is up to date. This call doesn't do
821              * anything if the texture is already up to date. */
822             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
823
824             /* We want to force a palette refresh, so mark the drawable as not being up to date */
825             if (!surface_is_offscreen(surface))
826                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
827         }
828         else
829         {
830             if (!(surface->flags & SFLAG_INSYSMEM))
831             {
832                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
833                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
834             }
835             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
836         }
837     }
838
839     if (surface->flags & SFLAG_DIBSECTION)
840     {
841         RGBQUAD col[256];
842         unsigned int i;
843
844         TRACE("Updating the DC's palette.\n");
845
846         for (i = 0; i < 256; ++i)
847         {
848             col[i].rgbRed   = palette->palents[i].peRed;
849             col[i].rgbGreen = palette->palents[i].peGreen;
850             col[i].rgbBlue  = palette->palents[i].peBlue;
851             col[i].rgbReserved = 0;
852         }
853         SetDIBColorTable(surface->hDC, 0, 256, col);
854     }
855
856     /* Propagate the changes to the drawable when we have a palette. */
857     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
858         surface_load_location(surface, surface->draw_binding, NULL);
859 }
860
861 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
862 {
863     HRESULT hr;
864
865     /* If there's no destination surface there is nothing to do. */
866     if (!surface->overlay_dest)
867         return WINED3D_OK;
868
869     /* Blt calls ModifyLocation on the dest surface, which in turn calls
870      * DrawOverlay to update the overlay. Prevent an endless recursion. */
871     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
872         return WINED3D_OK;
873
874     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
875     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
876             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3D_TEXF_LINEAR);
877     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
878
879     return hr;
880 }
881
882 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
883 {
884     struct wined3d_device *device = surface->resource.device;
885     const RECT *pass_rect = rect;
886
887     TRACE("surface %p, rect %s, flags %#x.\n",
888             surface, wine_dbgstr_rect(rect), flags);
889
890     if (flags & WINED3D_MAP_DISCARD)
891     {
892         TRACE("WINED3D_MAP_DISCARD flag passed, marking SYSMEM as up to date.\n");
893         surface_prepare_system_memory(surface);
894         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
895     }
896     else
897     {
898         /* surface_load_location() does not check if the rectangle specifies
899          * the full surface. Most callers don't need that, so do it here. */
900         if (rect && !rect->top && !rect->left
901                 && rect->right == surface->resource.width
902                 && rect->bottom == surface->resource.height)
903             pass_rect = NULL;
904         surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
905     }
906
907     if (surface->flags & SFLAG_PBO)
908     {
909         const struct wined3d_gl_info *gl_info;
910         struct wined3d_context *context;
911
912         context = context_acquire(device, NULL);
913         gl_info = context->gl_info;
914
915         ENTER_GL();
916         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
917         checkGLcall("glBindBufferARB");
918
919         /* This shouldn't happen but could occur if some other function
920          * didn't handle the PBO properly. */
921         if (surface->resource.allocatedMemory)
922             ERR("The surface already has PBO memory allocated.\n");
923
924         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
925         checkGLcall("glMapBufferARB");
926
927         /* Make sure the PBO isn't set anymore in order not to break non-PBO
928          * calls. */
929         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
930         checkGLcall("glBindBufferARB");
931
932         LEAVE_GL();
933         context_release(context);
934     }
935
936     if (!(flags & (WINED3D_MAP_NO_DIRTY_UPDATE | WINED3D_MAP_READONLY)))
937     {
938         if (!rect)
939             surface_add_dirty_rect(surface, NULL);
940         else
941         {
942             struct wined3d_box b;
943
944             b.left = rect->left;
945             b.top = rect->top;
946             b.right = rect->right;
947             b.bottom = rect->bottom;
948             b.front = 0;
949             b.back = 1;
950             surface_add_dirty_rect(surface, &b);
951         }
952     }
953 }
954
955 static void surface_unmap(struct wined3d_surface *surface)
956 {
957     struct wined3d_device *device = surface->resource.device;
958     BOOL fullsurface;
959
960     TRACE("surface %p.\n", surface);
961
962     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
963
964     if (surface->flags & SFLAG_PBO)
965     {
966         const struct wined3d_gl_info *gl_info;
967         struct wined3d_context *context;
968
969         TRACE("Freeing PBO memory.\n");
970
971         context = context_acquire(device, NULL);
972         gl_info = context->gl_info;
973
974         ENTER_GL();
975         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
976         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
977         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
978         checkGLcall("glUnmapBufferARB");
979         LEAVE_GL();
980         context_release(context);
981
982         surface->resource.allocatedMemory = NULL;
983     }
984
985     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
986
987     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
988     {
989         TRACE("Not dirtified, nothing to do.\n");
990         goto done;
991     }
992
993     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
994             && surface->container.u.swapchain->front_buffer == surface)
995     {
996         if (!surface->dirtyRect.left && !surface->dirtyRect.top
997                 && surface->dirtyRect.right == surface->resource.width
998                 && surface->dirtyRect.bottom == surface->resource.height)
999         {
1000             fullsurface = TRUE;
1001         }
1002         else
1003         {
1004             /* TODO: Proper partial rectangle tracking. */
1005             fullsurface = FALSE;
1006             surface->flags |= SFLAG_INSYSMEM;
1007         }
1008
1009         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
1010
1011         /* Partial rectangle tracking is not commonly implemented, it is only
1012          * done for render targets. INSYSMEM was set before to tell
1013          * surface_load_location() where to read the rectangle from.
1014          * Indrawable is set because all modifications from the partial
1015          * sysmem copy are written back to the drawable, thus the surface is
1016          * merged again in the drawable. The sysmem copy is not fully up to
1017          * date because only a subrectangle was read in Map(). */
1018         if (!fullsurface)
1019         {
1020             surface_modify_location(surface, surface->draw_binding, TRUE);
1021             surface_evict_sysmem(surface);
1022         }
1023
1024         surface->dirtyRect.left = surface->resource.width;
1025         surface->dirtyRect.top = surface->resource.height;
1026         surface->dirtyRect.right = 0;
1027         surface->dirtyRect.bottom = 0;
1028     }
1029     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
1030     {
1031         FIXME("Depth / stencil buffer locking is not implemented.\n");
1032     }
1033
1034 done:
1035     /* Overlays have to be redrawn manually after changes with the GL implementation */
1036     if (surface->overlay_dest)
1037         surface_draw_overlay(surface);
1038 }
1039
1040 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1041 {
1042     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1043         return FALSE;
1044     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1045         return FALSE;
1046     return TRUE;
1047 }
1048
1049 static void wined3d_surface_depth_blt_fbo(const struct wined3d_device *device, struct wined3d_surface *src_surface,
1050         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1051 {
1052     const struct wined3d_gl_info *gl_info;
1053     struct wined3d_context *context;
1054     DWORD src_mask, dst_mask;
1055     GLbitfield gl_mask;
1056
1057     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1058             device, src_surface, wine_dbgstr_rect(src_rect),
1059             dst_surface, wine_dbgstr_rect(dst_rect));
1060
1061     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1062     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1063
1064     if (src_mask != dst_mask)
1065     {
1066         ERR("Incompatible formats %s and %s.\n",
1067                 debug_d3dformat(src_surface->resource.format->id),
1068                 debug_d3dformat(dst_surface->resource.format->id));
1069         return;
1070     }
1071
1072     if (!src_mask)
1073     {
1074         ERR("Not a depth / stencil format: %s.\n",
1075                 debug_d3dformat(src_surface->resource.format->id));
1076         return;
1077     }
1078
1079     gl_mask = 0;
1080     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1081         gl_mask |= GL_DEPTH_BUFFER_BIT;
1082     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1083         gl_mask |= GL_STENCIL_BUFFER_BIT;
1084
1085     /* Make sure the locations are up-to-date. Loading the destination
1086      * surface isn't required if the entire surface is overwritten. */
1087     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1088     if (!surface_is_full_rect(dst_surface, dst_rect))
1089         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1090
1091     context = context_acquire(device, NULL);
1092     if (!context->valid)
1093     {
1094         context_release(context);
1095         WARN("Invalid context, skipping blit.\n");
1096         return;
1097     }
1098
1099     gl_info = context->gl_info;
1100
1101     ENTER_GL();
1102
1103     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1104     glReadBuffer(GL_NONE);
1105     checkGLcall("glReadBuffer()");
1106     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1107
1108     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1109     context_set_draw_buffer(context, GL_NONE);
1110     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1111     context_invalidate_state(context, STATE_FRAMEBUFFER);
1112
1113     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1114     {
1115         glDepthMask(GL_TRUE);
1116         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_ZWRITEENABLE));
1117     }
1118     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1119     {
1120         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1121         {
1122             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1123             context_invalidate_state(context, STATE_RENDER(WINED3D_RS_TWOSIDEDSTENCILMODE));
1124         }
1125         glStencilMask(~0U);
1126         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_STENCILWRITEMASK));
1127     }
1128
1129     glDisable(GL_SCISSOR_TEST);
1130     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1131
1132     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1133             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1134     checkGLcall("glBlitFramebuffer()");
1135
1136     LEAVE_GL();
1137
1138     if (wined3d_settings.strict_draw_ordering)
1139         wglFlush(); /* Flush to ensure ordering across contexts. */
1140
1141     context_release(context);
1142 }
1143
1144 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1145  * Depth / stencil is not supported. */
1146 static void surface_blt_fbo(const struct wined3d_device *device, enum wined3d_texture_filter_type filter,
1147         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1148         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1149 {
1150     const struct wined3d_gl_info *gl_info;
1151     struct wined3d_context *context;
1152     RECT src_rect, dst_rect;
1153     GLenum gl_filter;
1154     GLenum buffer;
1155
1156     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1157     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1158             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1159     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1160             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1161
1162     src_rect = *src_rect_in;
1163     dst_rect = *dst_rect_in;
1164
1165     switch (filter)
1166     {
1167         case WINED3D_TEXF_LINEAR:
1168             gl_filter = GL_LINEAR;
1169             break;
1170
1171         default:
1172             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1173         case WINED3D_TEXF_NONE:
1174         case WINED3D_TEXF_POINT:
1175             gl_filter = GL_NEAREST;
1176             break;
1177     }
1178
1179     /* Resolve the source surface first if needed. */
1180     if (src_location == SFLAG_INRB_MULTISAMPLE
1181             && (src_surface->resource.format->id != dst_surface->resource.format->id
1182                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1183                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1184         src_location = SFLAG_INRB_RESOLVED;
1185
1186     /* Make sure the locations are up-to-date. Loading the destination
1187      * surface isn't required if the entire surface is overwritten. (And is
1188      * in fact harmful if we're being called by surface_load_location() with
1189      * the purpose of loading the destination surface.) */
1190     surface_load_location(src_surface, src_location, NULL);
1191     if (!surface_is_full_rect(dst_surface, &dst_rect))
1192         surface_load_location(dst_surface, dst_location, NULL);
1193
1194     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1195     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1196     else context = context_acquire(device, NULL);
1197
1198     if (!context->valid)
1199     {
1200         context_release(context);
1201         WARN("Invalid context, skipping blit.\n");
1202         return;
1203     }
1204
1205     gl_info = context->gl_info;
1206
1207     if (src_location == SFLAG_INDRAWABLE)
1208     {
1209         TRACE("Source surface %p is onscreen.\n", src_surface);
1210         buffer = surface_get_gl_buffer(src_surface);
1211         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1212     }
1213     else
1214     {
1215         TRACE("Source surface %p is offscreen.\n", src_surface);
1216         buffer = GL_COLOR_ATTACHMENT0;
1217     }
1218
1219     ENTER_GL();
1220     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1221     glReadBuffer(buffer);
1222     checkGLcall("glReadBuffer()");
1223     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1224     LEAVE_GL();
1225
1226     if (dst_location == SFLAG_INDRAWABLE)
1227     {
1228         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1229         buffer = surface_get_gl_buffer(dst_surface);
1230         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1231     }
1232     else
1233     {
1234         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1235         buffer = GL_COLOR_ATTACHMENT0;
1236     }
1237
1238     ENTER_GL();
1239     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1240     context_set_draw_buffer(context, buffer);
1241     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1242     context_invalidate_state(context, STATE_FRAMEBUFFER);
1243
1244     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1245     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE));
1246     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE1));
1247     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE2));
1248     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE3));
1249
1250     glDisable(GL_SCISSOR_TEST);
1251     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1252
1253     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1254             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1255     checkGLcall("glBlitFramebuffer()");
1256
1257     LEAVE_GL();
1258
1259     if (wined3d_settings.strict_draw_ordering
1260             || (dst_location == SFLAG_INDRAWABLE
1261             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1262         wglFlush();
1263
1264     context_release(context);
1265 }
1266
1267 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1268         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
1269         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
1270 {
1271     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1272         return FALSE;
1273
1274     /* Source and/or destination need to be on the GL side */
1275     if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM)
1276         return FALSE;
1277
1278     switch (blit_op)
1279     {
1280         case WINED3D_BLIT_OP_COLOR_BLIT:
1281             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1282                 return FALSE;
1283             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1284                 return FALSE;
1285             break;
1286
1287         case WINED3D_BLIT_OP_DEPTH_BLIT:
1288             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1289                 return FALSE;
1290             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1291                 return FALSE;
1292             break;
1293
1294         default:
1295             return FALSE;
1296     }
1297
1298     if (!(src_format->id == dst_format->id
1299             || (is_identity_fixup(src_format->color_fixup)
1300             && is_identity_fixup(dst_format->color_fixup))))
1301         return FALSE;
1302
1303     return TRUE;
1304 }
1305
1306 /* This function checks if the primary render target uses the 8bit paletted format. */
1307 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1308 {
1309     if (device->fb.render_targets && device->fb.render_targets[0])
1310     {
1311         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1312         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1313                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1314             return TRUE;
1315     }
1316     return FALSE;
1317 }
1318
1319 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1320         DWORD color, struct wined3d_color *float_color)
1321 {
1322     const struct wined3d_format *format = surface->resource.format;
1323     const struct wined3d_device *device = surface->resource.device;
1324
1325     switch (format->id)
1326     {
1327         case WINED3DFMT_P8_UINT:
1328             if (surface->palette)
1329             {
1330                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1331                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1332                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1333             }
1334             else
1335             {
1336                 float_color->r = 0.0f;
1337                 float_color->g = 0.0f;
1338                 float_color->b = 0.0f;
1339             }
1340             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1341             break;
1342
1343         case WINED3DFMT_B5G6R5_UNORM:
1344             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1345             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1346             float_color->b = (color & 0x1f) / 31.0f;
1347             float_color->a = 1.0f;
1348             break;
1349
1350         case WINED3DFMT_B8G8R8_UNORM:
1351         case WINED3DFMT_B8G8R8X8_UNORM:
1352             float_color->r = D3DCOLOR_R(color);
1353             float_color->g = D3DCOLOR_G(color);
1354             float_color->b = D3DCOLOR_B(color);
1355             float_color->a = 1.0f;
1356             break;
1357
1358         case WINED3DFMT_B8G8R8A8_UNORM:
1359             float_color->r = D3DCOLOR_R(color);
1360             float_color->g = D3DCOLOR_G(color);
1361             float_color->b = D3DCOLOR_B(color);
1362             float_color->a = D3DCOLOR_A(color);
1363             break;
1364
1365         default:
1366             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1367             return FALSE;
1368     }
1369
1370     return TRUE;
1371 }
1372
1373 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1374 {
1375     const struct wined3d_format *format = surface->resource.format;
1376
1377     switch (format->id)
1378     {
1379         case WINED3DFMT_S1_UINT_D15_UNORM:
1380             *float_depth = depth / (float)0x00007fff;
1381             break;
1382
1383         case WINED3DFMT_D16_UNORM:
1384             *float_depth = depth / (float)0x0000ffff;
1385             break;
1386
1387         case WINED3DFMT_D24_UNORM_S8_UINT:
1388         case WINED3DFMT_X8D24_UNORM:
1389             *float_depth = depth / (float)0x00ffffff;
1390             break;
1391
1392         case WINED3DFMT_D32_UNORM:
1393             *float_depth = depth / (float)0xffffffff;
1394             break;
1395
1396         default:
1397             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1398             return FALSE;
1399     }
1400
1401     return TRUE;
1402 }
1403
1404 /* Do not call while under the GL lock. */
1405 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1406 {
1407     const struct wined3d_resource *resource = &surface->resource;
1408     struct wined3d_device *device = resource->device;
1409     const struct blit_shader *blitter;
1410
1411     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1412             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1413     if (!blitter)
1414     {
1415         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1416         return WINED3DERR_INVALIDCALL;
1417     }
1418
1419     return blitter->depth_fill(device, surface, rect, depth);
1420 }
1421
1422 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1423         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1424 {
1425     struct wined3d_device *device = src_surface->resource.device;
1426
1427     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1428             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1429             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1430         return WINED3DERR_INVALIDCALL;
1431
1432     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1433
1434     surface_modify_ds_location(dst_surface, SFLAG_INTEXTURE,
1435             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1436
1437     return WINED3D_OK;
1438 }
1439
1440 /* Do not call while under the GL lock. */
1441 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1442         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1443         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter)
1444 {
1445     struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1446     struct wined3d_device *device = dst_surface->resource.device;
1447     DWORD src_ds_flags, dst_ds_flags;
1448     RECT src_rect, dst_rect;
1449     BOOL scale, convert;
1450
1451     static const DWORD simple_blit = WINEDDBLT_ASYNC
1452             | WINEDDBLT_COLORFILL
1453             | WINEDDBLT_WAIT
1454             | WINEDDBLT_DEPTHFILL
1455             | WINEDDBLT_DONOTWAIT;
1456
1457     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1458             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1459             flags, fx, debug_d3dtexturefiltertype(filter));
1460     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1461
1462     if (fx)
1463     {
1464         TRACE("dwSize %#x.\n", fx->dwSize);
1465         TRACE("dwDDFX %#x.\n", fx->dwDDFX);
1466         TRACE("dwROP %#x.\n", fx->dwROP);
1467         TRACE("dwDDROP %#x.\n", fx->dwDDROP);
1468         TRACE("dwRotationAngle %#x.\n", fx->dwRotationAngle);
1469         TRACE("dwZBufferOpCode %#x.\n", fx->dwZBufferOpCode);
1470         TRACE("dwZBufferLow %#x.\n", fx->dwZBufferLow);
1471         TRACE("dwZBufferHigh %#x.\n", fx->dwZBufferHigh);
1472         TRACE("dwZBufferBaseDest %#x.\n", fx->dwZBufferBaseDest);
1473         TRACE("dwZDestConstBitDepth %#x.\n", fx->dwZDestConstBitDepth);
1474         TRACE("lpDDSZBufferDest %p.\n", fx->u1.lpDDSZBufferDest);
1475         TRACE("dwZSrcConstBitDepth %#x.\n", fx->dwZSrcConstBitDepth);
1476         TRACE("lpDDSZBufferSrc %p.\n", fx->u2.lpDDSZBufferSrc);
1477         TRACE("dwAlphaEdgeBlendBitDepth %#x.\n", fx->dwAlphaEdgeBlendBitDepth);
1478         TRACE("dwAlphaEdgeBlend %#x.\n", fx->dwAlphaEdgeBlend);
1479         TRACE("dwReserved %#x.\n", fx->dwReserved);
1480         TRACE("dwAlphaDestConstBitDepth %#x.\n", fx->dwAlphaDestConstBitDepth);
1481         TRACE("lpDDSAlphaDest %p.\n", fx->u3.lpDDSAlphaDest);
1482         TRACE("dwAlphaSrcConstBitDepth %#x.\n", fx->dwAlphaSrcConstBitDepth);
1483         TRACE("lpDDSAlphaSrc %p.\n", fx->u4.lpDDSAlphaSrc);
1484         TRACE("lpDDSPattern %p.\n", fx->u5.lpDDSPattern);
1485         TRACE("ddckDestColorkey {%#x, %#x}.\n",
1486                 fx->ddckDestColorkey.color_space_low_value,
1487                 fx->ddckDestColorkey.color_space_high_value);
1488         TRACE("ddckSrcColorkey {%#x, %#x}.\n",
1489                 fx->ddckSrcColorkey.color_space_low_value,
1490                 fx->ddckSrcColorkey.color_space_high_value);
1491     }
1492
1493     if (dst_surface->resource.map_count || (src_surface && src_surface->resource.map_count))
1494     {
1495         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1496         return WINEDDERR_SURFACEBUSY;
1497     }
1498
1499     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1500
1501     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1502             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1503             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1504             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1505             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1506     {
1507         WARN("The application gave us a bad destination rectangle.\n");
1508         return WINEDDERR_INVALIDRECT;
1509     }
1510
1511     if (src_surface)
1512     {
1513         surface_get_rect(src_surface, src_rect_in, &src_rect);
1514
1515         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1516                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1517                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1518                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1519                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1520         {
1521             WARN("Application gave us bad source rectangle for Blt.\n");
1522             return WINEDDERR_INVALIDRECT;
1523         }
1524     }
1525     else
1526     {
1527         memset(&src_rect, 0, sizeof(src_rect));
1528     }
1529
1530     if (!fx || !(fx->dwDDFX))
1531         flags &= ~WINEDDBLT_DDFX;
1532
1533     if (flags & WINEDDBLT_WAIT)
1534         flags &= ~WINEDDBLT_WAIT;
1535
1536     if (flags & WINEDDBLT_ASYNC)
1537     {
1538         static unsigned int once;
1539
1540         if (!once++)
1541             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1542         flags &= ~WINEDDBLT_ASYNC;
1543     }
1544
1545     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1546     if (flags & WINEDDBLT_DONOTWAIT)
1547     {
1548         static unsigned int once;
1549
1550         if (!once++)
1551             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1552         flags &= ~WINEDDBLT_DONOTWAIT;
1553     }
1554
1555     if (!device->d3d_initialized)
1556     {
1557         WARN("D3D not initialized, using fallback.\n");
1558         goto cpu;
1559     }
1560
1561     /* We want to avoid invalidating the sysmem location for converted
1562      * surfaces, since otherwise we'd have to convert the data back when
1563      * locking them. */
1564     if (dst_surface->flags & SFLAG_CONVERTED)
1565     {
1566         WARN("Converted surface, using CPU blit.\n");
1567         return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1568     }
1569
1570     if (flags & ~simple_blit)
1571     {
1572         WARN("Using fallback for complex blit (%#x).\n", flags);
1573         goto fallback;
1574     }
1575
1576     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1577         src_swapchain = src_surface->container.u.swapchain;
1578     else
1579         src_swapchain = NULL;
1580
1581     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1582         dst_swapchain = dst_surface->container.u.swapchain;
1583     else
1584         dst_swapchain = NULL;
1585
1586     /* This isn't strictly needed. FBO blits for example could deal with
1587      * cross-swapchain blits by first downloading the source to a texture
1588      * before switching to the destination context. We just have this here to
1589      * not have to deal with the issue, since cross-swapchain blits should be
1590      * rare. */
1591     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1592     {
1593         FIXME("Using fallback for cross-swapchain blit.\n");
1594         goto fallback;
1595     }
1596
1597     scale = src_surface
1598             && (src_rect.right - src_rect.left != dst_rect.right - dst_rect.left
1599             || src_rect.bottom - src_rect.top != dst_rect.bottom - dst_rect.top);
1600     convert = src_surface && src_surface->resource.format->id != dst_surface->resource.format->id;
1601
1602     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1603     if (src_surface)
1604         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1605     else
1606         src_ds_flags = 0;
1607
1608     if (src_ds_flags || dst_ds_flags)
1609     {
1610         if (flags & WINEDDBLT_DEPTHFILL)
1611         {
1612             float depth;
1613
1614             TRACE("Depth fill.\n");
1615
1616             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1617                 return WINED3DERR_INVALIDCALL;
1618
1619             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1620                 return WINED3D_OK;
1621         }
1622         else
1623         {
1624             if (src_ds_flags != dst_ds_flags)
1625             {
1626                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1627                 return WINED3DERR_INVALIDCALL;
1628             }
1629
1630             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1631                 return WINED3D_OK;
1632         }
1633     }
1634     else
1635     {
1636         /* In principle this would apply to depth blits as well, but we don't
1637          * implement those in the CPU blitter at the moment. */
1638         if ((dst_surface->flags & SFLAG_INSYSMEM)
1639                 && (!src_surface || (src_surface->flags & SFLAG_INSYSMEM)))
1640         {
1641             if (scale)
1642                 TRACE("Not doing sysmem blit because of scaling.\n");
1643             else if (convert)
1644                 TRACE("Not doing sysmem blit because of format conversion.\n");
1645             else
1646                 return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1647         }
1648
1649         if (flags & WINEDDBLT_COLORFILL)
1650         {
1651             struct wined3d_color color;
1652
1653             TRACE("Color fill.\n");
1654
1655             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1656                 goto fallback;
1657
1658             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1659                 return WINED3D_OK;
1660         }
1661         else
1662         {
1663             TRACE("Color blit.\n");
1664
1665             /* Upload */
1666             if ((src_surface->flags & SFLAG_INSYSMEM) && !(dst_surface->flags & SFLAG_INSYSMEM))
1667             {
1668                 if (scale)
1669                     TRACE("Not doing upload because of scaling.\n");
1670                 else if (convert)
1671                     TRACE("Not doing upload because of format conversion.\n");
1672                 else
1673                 {
1674                     POINT dst_point = {dst_rect.left, dst_rect.top};
1675
1676                     if (SUCCEEDED(surface_upload_from_surface(dst_surface, &dst_point, src_surface, &src_rect)))
1677                     {
1678                         if (!surface_is_offscreen(dst_surface))
1679                             surface_load_location(dst_surface, dst_surface->draw_binding, NULL);
1680                         return WINED3D_OK;
1681                     }
1682                 }
1683             }
1684
1685             /* Use present for back -> front blits. The idea behind this is
1686              * that present is potentially faster than a blit, in particular
1687              * when FBO blits aren't available. Some ddraw applications like
1688              * Half-Life and Prince of Persia 3D use Blt() from the backbuffer
1689              * to the frontbuffer instead of doing a Flip(). D3D8 and D3D9
1690              * applications can't blit directly to the frontbuffer. */
1691             if (dst_swapchain && dst_swapchain->back_buffers
1692                     && dst_surface == dst_swapchain->front_buffer
1693                     && src_surface == dst_swapchain->back_buffers[0])
1694             {
1695                 enum wined3d_swap_effect swap_effect = dst_swapchain->desc.swap_effect;
1696
1697                 TRACE("Using present for backbuffer -> frontbuffer blit.\n");
1698
1699                 /* Set the swap effect to COPY, we don't want the backbuffer
1700                  * to become undefined. */
1701                 dst_swapchain->desc.swap_effect = WINED3D_SWAP_EFFECT_COPY;
1702                 wined3d_swapchain_present(dst_swapchain, NULL, NULL, dst_swapchain->win_handle, NULL, 0);
1703                 dst_swapchain->desc.swap_effect = swap_effect;
1704
1705                 return WINED3D_OK;
1706             }
1707
1708             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1709                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1710                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1711             {
1712                 TRACE("Using FBO blit.\n");
1713
1714                 surface_blt_fbo(device, filter,
1715                         src_surface, src_surface->draw_binding, &src_rect,
1716                         dst_surface, dst_surface->draw_binding, &dst_rect);
1717                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1718                 return WINED3D_OK;
1719             }
1720
1721             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1722                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1723                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1724             {
1725                 TRACE("Using arbfp blit.\n");
1726
1727                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1728                     return WINED3D_OK;
1729             }
1730         }
1731     }
1732
1733 fallback:
1734
1735     /* Special cases for render targets. */
1736     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1737             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1738     {
1739         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1740                 src_surface, &src_rect, flags, fx, filter)))
1741             return WINED3D_OK;
1742     }
1743
1744 cpu:
1745
1746     /* For the rest call the X11 surface implementation. For render targets
1747      * this should be implemented OpenGL accelerated in BltOverride, other
1748      * blits are rather rare. */
1749     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1750 }
1751
1752 HRESULT CDECL wined3d_surface_get_render_target_data(struct wined3d_surface *surface,
1753         struct wined3d_surface *render_target)
1754 {
1755     TRACE("surface %p, render_target %p.\n", surface, render_target);
1756
1757     /* TODO: Check surface sizes, pools, etc. */
1758
1759     if (render_target->resource.multisample_type)
1760         return WINED3DERR_INVALIDCALL;
1761
1762     return wined3d_surface_blt(surface, NULL, render_target, NULL, 0, NULL, WINED3D_TEXF_POINT);
1763 }
1764
1765 /* Context activation is done by the caller. */
1766 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1767 {
1768     if (surface->flags & SFLAG_DIBSECTION)
1769     {
1770         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1771     }
1772     else
1773     {
1774         if (!surface->resource.heapMemory)
1775             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1776         else if (!(surface->flags & SFLAG_CLIENT))
1777             ERR("Surface %p has heapMemory %p and flags %#x.\n",
1778                     surface, surface->resource.heapMemory, surface->flags);
1779
1780         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1781                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1782     }
1783
1784     ENTER_GL();
1785     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1786     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1787     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1788             surface->resource.size, surface->resource.allocatedMemory));
1789     checkGLcall("glGetBufferSubDataARB");
1790     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1791     checkGLcall("glDeleteBuffersARB");
1792     LEAVE_GL();
1793
1794     surface->pbo = 0;
1795     surface->flags &= ~SFLAG_PBO;
1796 }
1797
1798 static BOOL surface_init_sysmem(struct wined3d_surface *surface)
1799 {
1800     if (!surface->resource.allocatedMemory)
1801     {
1802         if (!surface->resource.heapMemory)
1803         {
1804             if (!(surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
1805                     surface->resource.size + RESOURCE_ALIGNMENT)))
1806             {
1807                 ERR("Failed to allocate memory.\n");
1808                 return FALSE;
1809             }
1810         }
1811         else if (!(surface->flags & SFLAG_CLIENT))
1812         {
1813             ERR("Surface %p has heapMemory %p and flags %#x.\n",
1814                     surface, surface->resource.heapMemory, surface->flags);
1815         }
1816
1817         surface->resource.allocatedMemory =
1818             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1819     }
1820     else
1821     {
1822         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
1823     }
1824
1825     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1826
1827     return TRUE;
1828 }
1829
1830 /* Do not call while under the GL lock. */
1831 static void surface_unload(struct wined3d_resource *resource)
1832 {
1833     struct wined3d_surface *surface = surface_from_resource(resource);
1834     struct wined3d_renderbuffer_entry *entry, *entry2;
1835     struct wined3d_device *device = resource->device;
1836     const struct wined3d_gl_info *gl_info;
1837     struct wined3d_context *context;
1838
1839     TRACE("surface %p.\n", surface);
1840
1841     if (resource->pool == WINED3D_POOL_DEFAULT)
1842     {
1843         /* Default pool resources are supposed to be destroyed before Reset is called.
1844          * Implicit resources stay however. So this means we have an implicit render target
1845          * or depth stencil. The content may be destroyed, but we still have to tear down
1846          * opengl resources, so we cannot leave early.
1847          *
1848          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1849          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1850          * or the depth stencil into an FBO the texture or render buffer will be removed
1851          * and all flags get lost
1852          */
1853         if (!(surface->flags & SFLAG_PBO))
1854             surface_init_sysmem(surface);
1855         /* We also get here when the ddraw swapchain is destroyed, for example
1856          * for a mode switch. In this case this surface won't necessarily be
1857          * an implicit surface. We have to mark it lost so that the
1858          * application can restore it after the mode switch. */
1859         surface->flags |= SFLAG_LOST;
1860     }
1861     else
1862     {
1863         /* Load the surface into system memory */
1864         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1865         surface_modify_location(surface, surface->draw_binding, FALSE);
1866     }
1867     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1868     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1869     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1870
1871     context = context_acquire(device, NULL);
1872     gl_info = context->gl_info;
1873
1874     /* Destroy PBOs, but load them into real sysmem before */
1875     if (surface->flags & SFLAG_PBO)
1876         surface_remove_pbo(surface, gl_info);
1877
1878     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1879      * all application-created targets the application has to release the surface
1880      * before calling _Reset
1881      */
1882     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1883     {
1884         ENTER_GL();
1885         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1886         LEAVE_GL();
1887         list_remove(&entry->entry);
1888         HeapFree(GetProcessHeap(), 0, entry);
1889     }
1890     list_init(&surface->renderbuffers);
1891     surface->current_renderbuffer = NULL;
1892
1893     ENTER_GL();
1894
1895     /* If we're in a texture, the texture name belongs to the texture.
1896      * Otherwise, destroy it. */
1897     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1898     {
1899         glDeleteTextures(1, &surface->texture_name);
1900         surface->texture_name = 0;
1901         glDeleteTextures(1, &surface->texture_name_srgb);
1902         surface->texture_name_srgb = 0;
1903     }
1904     if (surface->rb_multisample)
1905     {
1906         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1907         surface->rb_multisample = 0;
1908     }
1909     if (surface->rb_resolved)
1910     {
1911         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1912         surface->rb_resolved = 0;
1913     }
1914
1915     LEAVE_GL();
1916
1917     context_release(context);
1918
1919     resource_unload(resource);
1920 }
1921
1922 static const struct wined3d_resource_ops surface_resource_ops =
1923 {
1924     surface_unload,
1925 };
1926
1927 static const struct wined3d_surface_ops surface_ops =
1928 {
1929     surface_private_setup,
1930     surface_realize_palette,
1931     surface_map,
1932     surface_unmap,
1933 };
1934
1935 /*****************************************************************************
1936  * Initializes the GDI surface, aka creates the DIB section we render to
1937  * The DIB section creation is done by calling GetDC, which will create the
1938  * section and releasing the dc to allow the app to use it. The dib section
1939  * will stay until the surface is released
1940  *
1941  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1942  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1943  * avoid confusion in the shared surface code.
1944  *
1945  * Returns:
1946  *  WINED3D_OK on success
1947  *  The return values of called methods on failure
1948  *
1949  *****************************************************************************/
1950 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1951 {
1952     HRESULT hr;
1953
1954     TRACE("surface %p.\n", surface);
1955
1956     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1957     {
1958         ERR("Overlays not yet supported by GDI surfaces.\n");
1959         return WINED3DERR_INVALIDCALL;
1960     }
1961
1962     /* Sysmem textures have memory already allocated - release it,
1963      * this avoids an unnecessary memcpy. */
1964     hr = surface_create_dib_section(surface);
1965     if (SUCCEEDED(hr))
1966     {
1967         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1968         surface->resource.heapMemory = NULL;
1969         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1970     }
1971
1972     /* We don't mind the nonpow2 stuff in GDI. */
1973     surface->pow2Width = surface->resource.width;
1974     surface->pow2Height = surface->resource.height;
1975
1976     return WINED3D_OK;
1977 }
1978
1979 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1980 {
1981     struct wined3d_palette *palette = surface->palette;
1982
1983     TRACE("surface %p.\n", surface);
1984
1985     if (!palette) return;
1986
1987     if (surface->flags & SFLAG_DIBSECTION)
1988     {
1989         RGBQUAD col[256];
1990         unsigned int i;
1991
1992         TRACE("Updating the DC's palette.\n");
1993
1994         for (i = 0; i < 256; ++i)
1995         {
1996             col[i].rgbRed = palette->palents[i].peRed;
1997             col[i].rgbGreen = palette->palents[i].peGreen;
1998             col[i].rgbBlue = palette->palents[i].peBlue;
1999             col[i].rgbReserved = 0;
2000         }
2001         SetDIBColorTable(surface->hDC, 0, 256, col);
2002     }
2003
2004     /* Update the image because of the palette change. Some games like e.g.
2005      * Red Alert call SetEntries a lot to implement fading. */
2006     /* Tell the swapchain to update the screen. */
2007     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2008     {
2009         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2010         if (surface == swapchain->front_buffer)
2011         {
2012             x11_copy_to_screen(swapchain, NULL);
2013         }
2014     }
2015 }
2016
2017 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
2018 {
2019     TRACE("surface %p, rect %s, flags %#x.\n",
2020             surface, wine_dbgstr_rect(rect), flags);
2021
2022     if (!(surface->flags & SFLAG_DIBSECTION))
2023     {
2024         HRESULT hr;
2025
2026         /* This happens on gdi surfaces if the application set a user pointer
2027          * and resets it. Recreate the DIB section. */
2028         if (FAILED(hr = surface_create_dib_section(surface)))
2029         {
2030             ERR("Failed to create dib section, hr %#x.\n", hr);
2031             return;
2032         }
2033         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
2034         surface->resource.heapMemory = NULL;
2035         surface->resource.allocatedMemory = surface->dib.bitmap_data;
2036     }
2037 }
2038
2039 static void gdi_surface_unmap(struct wined3d_surface *surface)
2040 {
2041     TRACE("surface %p.\n", surface);
2042
2043     /* Tell the swapchain to update the screen. */
2044     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2045     {
2046         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2047         if (surface == swapchain->front_buffer)
2048         {
2049             x11_copy_to_screen(swapchain, &surface->lockedRect);
2050         }
2051     }
2052
2053     memset(&surface->lockedRect, 0, sizeof(RECT));
2054 }
2055
2056 static const struct wined3d_surface_ops gdi_surface_ops =
2057 {
2058     gdi_surface_private_setup,
2059     gdi_surface_realize_palette,
2060     gdi_surface_map,
2061     gdi_surface_unmap,
2062 };
2063
2064 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2065 {
2066     GLuint *name;
2067     DWORD flag;
2068
2069     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2070
2071     if(srgb)
2072     {
2073         name = &surface->texture_name_srgb;
2074         flag = SFLAG_INSRGBTEX;
2075     }
2076     else
2077     {
2078         name = &surface->texture_name;
2079         flag = SFLAG_INTEXTURE;
2080     }
2081
2082     if (!*name && new_name)
2083     {
2084         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2085          * surface has no texture name yet. See if we can get rid of this. */
2086         if (surface->flags & flag)
2087         {
2088             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2089             surface_modify_location(surface, flag, FALSE);
2090         }
2091     }
2092
2093     *name = new_name;
2094     surface_force_reload(surface);
2095 }
2096
2097 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
2098 {
2099     TRACE("surface %p, target %#x.\n", surface, target);
2100
2101     if (surface->texture_target != target)
2102     {
2103         if (target == GL_TEXTURE_RECTANGLE_ARB)
2104         {
2105             surface->flags &= ~SFLAG_NORMCOORD;
2106         }
2107         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2108         {
2109             surface->flags |= SFLAG_NORMCOORD;
2110         }
2111     }
2112     surface->texture_target = target;
2113     surface_force_reload(surface);
2114 }
2115
2116 /* This call just downloads data, the caller is responsible for binding the
2117  * correct texture. */
2118 /* Context activation is done by the caller. */
2119 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2120 {
2121     const struct wined3d_format *format = surface->resource.format;
2122
2123     /* Only support read back of converted P8 surfaces. */
2124     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2125     {
2126         ERR("Trying to read back converted surface %p with format %s.\n", surface, debug_d3dformat(format->id));
2127         return;
2128     }
2129
2130     ENTER_GL();
2131
2132     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2133     {
2134         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2135                 surface, surface->texture_level, format->glFormat, format->glType,
2136                 surface->resource.allocatedMemory);
2137
2138         if (surface->flags & SFLAG_PBO)
2139         {
2140             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2141             checkGLcall("glBindBufferARB");
2142             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2143             checkGLcall("glGetCompressedTexImageARB");
2144             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2145             checkGLcall("glBindBufferARB");
2146         }
2147         else
2148         {
2149             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2150                     surface->texture_level, surface->resource.allocatedMemory));
2151             checkGLcall("glGetCompressedTexImageARB");
2152         }
2153
2154         LEAVE_GL();
2155     }
2156     else
2157     {
2158         void *mem;
2159         GLenum gl_format = format->glFormat;
2160         GLenum gl_type = format->glType;
2161         int src_pitch = 0;
2162         int dst_pitch = 0;
2163
2164         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2165         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2166         {
2167             gl_format = GL_ALPHA;
2168             gl_type = GL_UNSIGNED_BYTE;
2169         }
2170
2171         if (surface->flags & SFLAG_NONPOW2)
2172         {
2173             unsigned char alignment = surface->resource.device->surface_alignment;
2174             src_pitch = format->byte_count * surface->pow2Width;
2175             dst_pitch = wined3d_surface_get_pitch(surface);
2176             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2177             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2178         }
2179         else
2180         {
2181             mem = surface->resource.allocatedMemory;
2182         }
2183
2184         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2185                 surface, surface->texture_level, gl_format, gl_type, mem);
2186
2187         if (surface->flags & SFLAG_PBO)
2188         {
2189             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2190             checkGLcall("glBindBufferARB");
2191
2192             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2193             checkGLcall("glGetTexImage");
2194
2195             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2196             checkGLcall("glBindBufferARB");
2197         }
2198         else
2199         {
2200             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2201             checkGLcall("glGetTexImage");
2202         }
2203         LEAVE_GL();
2204
2205         if (surface->flags & SFLAG_NONPOW2)
2206         {
2207             const BYTE *src_data;
2208             BYTE *dst_data;
2209             UINT y;
2210             /*
2211              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2212              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2213              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2214              *
2215              * We're doing this...
2216              *
2217              * instead of boxing the texture :
2218              * |<-texture width ->|  -->pow2width|   /\
2219              * |111111111111111111|              |   |
2220              * |222 Texture 222222| boxed empty  | texture height
2221              * |3333 Data 33333333|              |   |
2222              * |444444444444444444|              |   \/
2223              * -----------------------------------   |
2224              * |     boxed  empty | boxed empty  | pow2height
2225              * |                  |              |   \/
2226              * -----------------------------------
2227              *
2228              *
2229              * we're repacking the data to the expected texture width
2230              *
2231              * |<-texture width ->|  -->pow2width|   /\
2232              * |111111111111111111222222222222222|   |
2233              * |222333333333333333333444444444444| texture height
2234              * |444444                           |   |
2235              * |                                 |   \/
2236              * |                                 |   |
2237              * |            empty                | pow2height
2238              * |                                 |   \/
2239              * -----------------------------------
2240              *
2241              * == is the same as
2242              *
2243              * |<-texture width ->|    /\
2244              * |111111111111111111|
2245              * |222222222222222222|texture height
2246              * |333333333333333333|
2247              * |444444444444444444|    \/
2248              * --------------------
2249              *
2250              * this also means that any references to allocatedMemory should work with the data as if were a
2251              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2252              *
2253              * internally the texture is still stored in a boxed format so any references to textureName will
2254              * get a boxed texture with width pow2width and not a texture of width resource.width.
2255              *
2256              * Performance should not be an issue, because applications normally do not lock the surfaces when
2257              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2258              * and doesn't have to be re-read. */
2259             src_data = mem;
2260             dst_data = surface->resource.allocatedMemory;
2261             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2262             for (y = 1; y < surface->resource.height; ++y)
2263             {
2264                 /* skip the first row */
2265                 src_data += src_pitch;
2266                 dst_data += dst_pitch;
2267                 memcpy(dst_data, src_data, dst_pitch);
2268             }
2269
2270             HeapFree(GetProcessHeap(), 0, mem);
2271         }
2272     }
2273
2274     /* Surface has now been downloaded */
2275     surface->flags |= SFLAG_INSYSMEM;
2276 }
2277
2278 /* This call just uploads data, the caller is responsible for binding the
2279  * correct texture. */
2280 /* Context activation is done by the caller. */
2281 static void surface_upload_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2282         const struct wined3d_format *format, const RECT *src_rect, UINT src_pitch, const POINT *dst_point,
2283         BOOL srgb, const struct wined3d_bo_address *data)
2284 {
2285     UINT update_w = src_rect->right - src_rect->left;
2286     UINT update_h = src_rect->bottom - src_rect->top;
2287
2288     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_pitch %u, dst_point %s, srgb %#x, data {%#x:%p}.\n",
2289             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_pitch,
2290             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2291
2292     if (surface->resource.map_count)
2293     {
2294         WARN("Uploading a surface that is currently mapped, setting SFLAG_PIN_SYSMEM.\n");
2295         surface->flags |= SFLAG_PIN_SYSMEM;
2296     }
2297
2298     if (format->flags & WINED3DFMT_FLAG_HEIGHT_SCALE)
2299     {
2300         update_h *= format->height_scale.numerator;
2301         update_h /= format->height_scale.denominator;
2302     }
2303
2304     ENTER_GL();
2305
2306     if (data->buffer_object)
2307     {
2308         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2309         checkGLcall("glBindBufferARB");
2310     }
2311
2312     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2313     {
2314         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2315         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2316         const BYTE *addr = data->addr;
2317         GLenum internal;
2318
2319         addr += (src_rect->top / format->block_height) * src_pitch;
2320         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2321
2322         if (srgb)
2323             internal = format->glGammaInternal;
2324         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2325             internal = format->rtInternal;
2326         else
2327             internal = format->glInternal;
2328
2329         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2330                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2331                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2332
2333         if (row_length == src_pitch)
2334         {
2335             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2336                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2337         }
2338         else
2339         {
2340             UINT row, y;
2341
2342             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2343              * can't use the unpack row length like below. */
2344             for (row = 0, y = dst_point->y; row < row_count; ++row)
2345             {
2346                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2347                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2348                 y += format->block_height;
2349                 addr += src_pitch;
2350             }
2351         }
2352         checkGLcall("glCompressedTexSubImage2DARB");
2353     }
2354     else
2355     {
2356         const BYTE *addr = data->addr;
2357
2358         addr += src_rect->top * src_pitch;
2359         addr += src_rect->left * format->byte_count;
2360
2361         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2362                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2363                 update_w, update_h, format->glFormat, format->glType, addr);
2364
2365         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_pitch / format->byte_count);
2366         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2367                 update_w, update_h, format->glFormat, format->glType, addr);
2368         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2369         checkGLcall("glTexSubImage2D");
2370     }
2371
2372     if (data->buffer_object)
2373     {
2374         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2375         checkGLcall("glBindBufferARB");
2376     }
2377
2378     LEAVE_GL();
2379
2380     if (wined3d_settings.strict_draw_ordering)
2381         wglFlush();
2382
2383     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2384     {
2385         struct wined3d_device *device = surface->resource.device;
2386         unsigned int i;
2387
2388         for (i = 0; i < device->context_count; ++i)
2389         {
2390             context_surface_update(device->contexts[i], surface);
2391         }
2392     }
2393 }
2394
2395 static HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck, BOOL use_texturing,
2396         struct wined3d_format *format, enum wined3d_conversion_type *conversion_type)
2397 {
2398     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2399     const struct wined3d_device *device = surface->resource.device;
2400     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
2401     BOOL blit_supported = FALSE;
2402
2403     /* Copy the default values from the surface. Below we might perform fixups */
2404     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
2405     *format = *surface->resource.format;
2406     *conversion_type = WINED3D_CT_NONE;
2407
2408     /* Ok, now look if we have to do any conversion */
2409     switch (surface->resource.format->id)
2410     {
2411         case WINED3DFMT_P8_UINT:
2412             /* Below the call to blit_supported is disabled for Wine 1.2
2413              * because the function isn't operating correctly yet. At the
2414              * moment 8-bit blits are handled in software and if certain GL
2415              * extensions are around, surface conversion is performed at
2416              * upload time. The blit_supported call recognizes it as a
2417              * destination fixup. This type of upload 'fixup' and 8-bit to
2418              * 8-bit blits need to be handled by the blit_shader.
2419              * TODO: get rid of this #if 0. */
2420 #if 0
2421             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
2422                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
2423                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
2424 #endif
2425             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
2426
2427             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
2428              * texturing. Further also use conversion in case of color keying.
2429              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
2430              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
2431              * conflicts with this.
2432              */
2433             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
2434                     || colorkey_active || !use_texturing)
2435             {
2436                 format->glFormat = GL_RGBA;
2437                 format->glInternal = GL_RGBA;
2438                 format->glType = GL_UNSIGNED_BYTE;
2439                 format->conv_byte_count = 4;
2440                 if (colorkey_active)
2441                     *conversion_type = WINED3D_CT_PALETTED_CK;
2442                 else
2443                     *conversion_type = WINED3D_CT_PALETTED;
2444             }
2445             break;
2446
2447         case WINED3DFMT_B2G3R3_UNORM:
2448             /* **********************
2449                 GL_UNSIGNED_BYTE_3_3_2
2450                 ********************** */
2451             if (colorkey_active) {
2452                 /* This texture format will never be used.. So do not care about color keying
2453                     up until the point in time it will be needed :-) */
2454                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
2455             }
2456             break;
2457
2458         case WINED3DFMT_B5G6R5_UNORM:
2459             if (colorkey_active)
2460             {
2461                 *conversion_type = WINED3D_CT_CK_565;
2462                 format->glFormat = GL_RGBA;
2463                 format->glInternal = GL_RGB5_A1;
2464                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
2465                 format->conv_byte_count = 2;
2466             }
2467             break;
2468
2469         case WINED3DFMT_B5G5R5X1_UNORM:
2470             if (colorkey_active)
2471             {
2472                 *conversion_type = WINED3D_CT_CK_5551;
2473                 format->glFormat = GL_BGRA;
2474                 format->glInternal = GL_RGB5_A1;
2475                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
2476                 format->conv_byte_count = 2;
2477             }
2478             break;
2479
2480         case WINED3DFMT_B8G8R8_UNORM:
2481             if (colorkey_active)
2482             {
2483                 *conversion_type = WINED3D_CT_CK_RGB24;
2484                 format->glFormat = GL_RGBA;
2485                 format->glInternal = GL_RGBA8;
2486                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
2487                 format->conv_byte_count = 4;
2488             }
2489             break;
2490
2491         case WINED3DFMT_B8G8R8X8_UNORM:
2492             if (colorkey_active)
2493             {
2494                 *conversion_type = WINED3D_CT_RGB32_888;
2495                 format->glFormat = GL_RGBA;
2496                 format->glInternal = GL_RGBA8;
2497                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
2498                 format->conv_byte_count = 4;
2499             }
2500             break;
2501
2502         case WINED3DFMT_B8G8R8A8_UNORM:
2503             if (colorkey_active)
2504             {
2505                 *conversion_type = WINED3D_CT_CK_ARGB32;
2506                 format->conv_byte_count = 4;
2507             }
2508             break;
2509
2510         default:
2511             break;
2512     }
2513
2514     if (*conversion_type != WINED3D_CT_NONE)
2515     {
2516         format->rtInternal = format->glInternal;
2517         format->glGammaInternal = format->glInternal;
2518     }
2519
2520     return WINED3D_OK;
2521 }
2522
2523 HRESULT surface_upload_from_surface(struct wined3d_surface *dst_surface, const POINT *dst_point,
2524         struct wined3d_surface *src_surface, const RECT *src_rect)
2525 {
2526     const struct wined3d_format *src_format;
2527     const struct wined3d_format *dst_format;
2528     const struct wined3d_gl_info *gl_info;
2529     enum wined3d_conversion_type convert;
2530     struct wined3d_context *context;
2531     struct wined3d_bo_address data;
2532     struct wined3d_format format;
2533     UINT update_w, update_h;
2534     UINT dst_w, dst_h;
2535     UINT src_w, src_h;
2536     UINT src_pitch;
2537     POINT p;
2538     RECT r;
2539
2540     TRACE("dst_surface %p, dst_point %s, src_surface %p, src_rect %s.\n",
2541             dst_surface, wine_dbgstr_point(dst_point),
2542             src_surface, wine_dbgstr_rect(src_rect));
2543
2544     src_format = src_surface->resource.format;
2545     dst_format = dst_surface->resource.format;
2546
2547     if (src_format->id != dst_format->id)
2548     {
2549         WARN("Source and destination surfaces should have the same format.\n");
2550         return WINED3DERR_INVALIDCALL;
2551     }
2552
2553     if (!dst_point)
2554     {
2555         p.x = 0;
2556         p.y = 0;
2557         dst_point = &p;
2558     }
2559     else if (dst_point->x < 0 || dst_point->y < 0)
2560     {
2561         WARN("Invalid destination point.\n");
2562         return WINED3DERR_INVALIDCALL;
2563     }
2564
2565     if (!src_rect)
2566     {
2567         r.left = 0;
2568         r.top = 0;
2569         r.right = src_surface->resource.width;
2570         r.bottom = src_surface->resource.height;
2571         src_rect = &r;
2572     }
2573     else if (src_rect->left < 0 || src_rect->left >= src_rect->right
2574             || src_rect->top < 0 || src_rect->top >= src_rect->bottom)
2575     {
2576         WARN("Invalid source rectangle.\n");
2577         return WINED3DERR_INVALIDCALL;
2578     }
2579
2580     src_w = src_surface->resource.width;
2581     src_h = src_surface->resource.height;
2582
2583     dst_w = dst_surface->resource.width;
2584     dst_h = dst_surface->resource.height;
2585
2586     update_w = src_rect->right - src_rect->left;
2587     update_h = src_rect->bottom - src_rect->top;
2588
2589     if (update_w > dst_w || dst_point->x > dst_w - update_w
2590             || update_h > dst_h || dst_point->y > dst_h - update_h)
2591     {
2592         WARN("Destination out of bounds.\n");
2593         return WINED3DERR_INVALIDCALL;
2594     }
2595
2596     /* NPOT block sizes would be silly. */
2597     if ((src_format->flags & WINED3DFMT_FLAG_BLOCKS)
2598             && ((update_w & (src_format->block_width - 1) || update_h & (src_format->block_height - 1))
2599             && (src_w != update_w || dst_w != update_w || src_h != update_h || dst_h != update_h)))
2600     {
2601         WARN("Update rect not block-aligned.\n");
2602         return WINED3DERR_INVALIDCALL;
2603     }
2604
2605     /* Use wined3d_surface_blt() instead of uploading directly if we need conversion. */
2606     d3dfmt_get_conv(dst_surface, FALSE, TRUE, &format, &convert);
2607     if (convert != WINED3D_CT_NONE || format.convert)
2608     {
2609         RECT dst_rect = {dst_point->x,  dst_point->y, dst_point->x + update_w, dst_point->y + update_h};
2610         return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, src_rect, 0, NULL, WINED3D_TEXF_POINT);
2611     }
2612
2613     context = context_acquire(dst_surface->resource.device, NULL);
2614     gl_info = context->gl_info;
2615
2616     /* Only load the surface for partial updates. For newly allocated texture
2617      * the texture wouldn't be the current location, and we'd upload zeroes
2618      * just to overwrite them again. */
2619     if (update_w == dst_w && update_h == dst_h)
2620         surface_prepare_texture(dst_surface, context, FALSE);
2621     else
2622         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
2623     surface_bind(dst_surface, context, FALSE);
2624
2625     data.buffer_object = src_surface->pbo;
2626     data.addr = src_surface->resource.allocatedMemory;
2627     src_pitch = wined3d_surface_get_pitch(src_surface);
2628
2629     surface_upload_data(dst_surface, gl_info, src_format, src_rect, src_pitch, dst_point, FALSE, &data);
2630
2631     invalidate_active_texture(dst_surface->resource.device, context);
2632
2633     context_release(context);
2634
2635     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
2636     return WINED3D_OK;
2637 }
2638
2639 /* This call just allocates the texture, the caller is responsible for binding
2640  * the correct texture. */
2641 /* Context activation is done by the caller. */
2642 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2643         const struct wined3d_format *format, BOOL srgb)
2644 {
2645     BOOL enable_client_storage = FALSE;
2646     GLsizei width = surface->pow2Width;
2647     GLsizei height = surface->pow2Height;
2648     const BYTE *mem = NULL;
2649     GLenum internal;
2650
2651     if (srgb)
2652     {
2653         internal = format->glGammaInternal;
2654     }
2655     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2656     {
2657         internal = format->rtInternal;
2658     }
2659     else
2660     {
2661         internal = format->glInternal;
2662     }
2663
2664     if (format->flags & WINED3DFMT_FLAG_HEIGHT_SCALE)
2665     {
2666         height *= format->height_scale.numerator;
2667         height /= format->height_scale.denominator;
2668     }
2669
2670     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2671             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2672             internal, width, height, format->glFormat, format->glType);
2673
2674     ENTER_GL();
2675
2676     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2677     {
2678         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2679                 || !surface->resource.allocatedMemory)
2680         {
2681             /* In some cases we want to disable client storage.
2682              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2683              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2684              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2685              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2686              */
2687             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2688             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2689             surface->flags &= ~SFLAG_CLIENT;
2690             enable_client_storage = TRUE;
2691         }
2692         else
2693         {
2694             surface->flags |= SFLAG_CLIENT;
2695
2696             /* Point OpenGL to our allocated texture memory. Do not use
2697              * resource.allocatedMemory here because it might point into a
2698              * PBO. Instead use heapMemory, but get the alignment right. */
2699             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2700                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2701         }
2702     }
2703
2704     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2705     {
2706         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2707                 internal, width, height, 0, surface->resource.size, mem));
2708         checkGLcall("glCompressedTexImage2DARB");
2709     }
2710     else
2711     {
2712         glTexImage2D(surface->texture_target, surface->texture_level,
2713                 internal, width, height, 0, format->glFormat, format->glType, mem);
2714         checkGLcall("glTexImage2D");
2715     }
2716
2717     if(enable_client_storage) {
2718         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2719         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2720     }
2721     LEAVE_GL();
2722 }
2723
2724 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2725  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2726 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2727 /* GL locking is done by the caller */
2728 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2729 {
2730     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2731     struct wined3d_renderbuffer_entry *entry;
2732     GLuint renderbuffer = 0;
2733     unsigned int src_width, src_height;
2734     unsigned int width, height;
2735
2736     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2737     {
2738         width = rt->pow2Width;
2739         height = rt->pow2Height;
2740     }
2741     else
2742     {
2743         width = surface->pow2Width;
2744         height = surface->pow2Height;
2745     }
2746
2747     src_width = surface->pow2Width;
2748     src_height = surface->pow2Height;
2749
2750     /* A depth stencil smaller than the render target is not valid */
2751     if (width > src_width || height > src_height) return;
2752
2753     /* Remove any renderbuffer set if the sizes match */
2754     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2755             || (width == src_width && height == src_height))
2756     {
2757         surface->current_renderbuffer = NULL;
2758         return;
2759     }
2760
2761     /* Look if we've already got a renderbuffer of the correct dimensions */
2762     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2763     {
2764         if (entry->width == width && entry->height == height)
2765         {
2766             renderbuffer = entry->id;
2767             surface->current_renderbuffer = entry;
2768             break;
2769         }
2770     }
2771
2772     if (!renderbuffer)
2773     {
2774         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2775         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2776         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2777                 surface->resource.format->glInternal, width, height);
2778
2779         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2780         entry->width = width;
2781         entry->height = height;
2782         entry->id = renderbuffer;
2783         list_add_head(&surface->renderbuffers, &entry->entry);
2784
2785         surface->current_renderbuffer = entry;
2786     }
2787
2788     checkGLcall("set_compatible_renderbuffer");
2789 }
2790
2791 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2792 {
2793     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2794
2795     TRACE("surface %p.\n", surface);
2796
2797     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2798     {
2799         ERR("Surface %p is not on a swapchain.\n", surface);
2800         return GL_NONE;
2801     }
2802
2803     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2804     {
2805         if (swapchain->render_to_fbo)
2806         {
2807             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2808             return GL_COLOR_ATTACHMENT0;
2809         }
2810         TRACE("Returning GL_BACK\n");
2811         return GL_BACK;
2812     }
2813     else if (surface == swapchain->front_buffer)
2814     {
2815         TRACE("Returning GL_FRONT\n");
2816         return GL_FRONT;
2817     }
2818
2819     FIXME("Higher back buffer, returning GL_BACK\n");
2820     return GL_BACK;
2821 }
2822
2823 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2824 void surface_add_dirty_rect(struct wined3d_surface *surface, const struct wined3d_box *dirty_rect)
2825 {
2826     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2827
2828     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2829         /* No partial locking for textures yet. */
2830         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2831
2832     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2833     if (dirty_rect)
2834     {
2835         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->left);
2836         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->top);
2837         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->right);
2838         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->bottom);
2839     }
2840     else
2841     {
2842         surface->dirtyRect.left = 0;
2843         surface->dirtyRect.top = 0;
2844         surface->dirtyRect.right = surface->resource.width;
2845         surface->dirtyRect.bottom = surface->resource.height;
2846     }
2847
2848     /* if the container is a texture then mark it dirty. */
2849     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2850     {
2851         TRACE("Passing to container.\n");
2852         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2853     }
2854 }
2855
2856 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2857 {
2858     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2859     BOOL ck_changed;
2860
2861     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2862
2863     if (surface->resource.pool == WINED3D_POOL_SCRATCH)
2864     {
2865         ERR("Not supported on scratch surfaces.\n");
2866         return WINED3DERR_INVALIDCALL;
2867     }
2868
2869     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2870
2871     /* Reload if either the texture and sysmem have different ideas about the
2872      * color key, or the actual key values changed. */
2873     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2874             && (surface->gl_color_key.color_space_low_value != surface->src_blt_color_key.color_space_low_value
2875             || surface->gl_color_key.color_space_high_value != surface->src_blt_color_key.color_space_high_value)))
2876     {
2877         TRACE("Reloading because of color keying\n");
2878         /* To perform the color key conversion we need a sysmem copy of
2879          * the surface. Make sure we have it. */
2880
2881         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2882         /* Make sure the texture is reloaded because of the color key change,
2883          * this kills performance though :( */
2884         /* TODO: This is not necessarily needed with hw palettized texture support. */
2885         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2886         /* Switching color keying on / off may change the internal format. */
2887         if (ck_changed)
2888             surface_force_reload(surface);
2889     }
2890     else if (!(surface->flags & flag))
2891     {
2892         TRACE("Reloading because surface is dirty.\n");
2893     }
2894     else
2895     {
2896         TRACE("surface is already in texture\n");
2897         return WINED3D_OK;
2898     }
2899
2900     /* No partial locking for textures yet. */
2901     surface_load_location(surface, flag, NULL);
2902     surface_evict_sysmem(surface);
2903
2904     return WINED3D_OK;
2905 }
2906
2907 /* See also float_16_to_32() in wined3d_private.h */
2908 static inline unsigned short float_32_to_16(const float *in)
2909 {
2910     int exp = 0;
2911     float tmp = fabsf(*in);
2912     unsigned int mantissa;
2913     unsigned short ret;
2914
2915     /* Deal with special numbers */
2916     if (*in == 0.0f)
2917         return 0x0000;
2918     if (isnan(*in))
2919         return 0x7c01;
2920     if (isinf(*in))
2921         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2922
2923     if (tmp < powf(2, 10))
2924     {
2925         do
2926         {
2927             tmp = tmp * 2.0f;
2928             exp--;
2929         } while (tmp < powf(2, 10));
2930     }
2931     else if (tmp >= powf(2, 11))
2932     {
2933         do
2934         {
2935             tmp /= 2.0f;
2936             exp++;
2937         } while (tmp >= powf(2, 11));
2938     }
2939
2940     mantissa = (unsigned int)tmp;
2941     if (tmp - mantissa >= 0.5f)
2942         ++mantissa; /* Round to nearest, away from zero. */
2943
2944     exp += 10;  /* Normalize the mantissa. */
2945     exp += 15;  /* Exponent is encoded with excess 15. */
2946
2947     if (exp > 30) /* too big */
2948     {
2949         ret = 0x7c00; /* INF */
2950     }
2951     else if (exp <= 0)
2952     {
2953         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2954         while (exp <= 0)
2955         {
2956             mantissa = mantissa >> 1;
2957             ++exp;
2958         }
2959         ret = mantissa & 0x3ff;
2960     }
2961     else
2962     {
2963         ret = (exp << 10) | (mantissa & 0x3ff);
2964     }
2965
2966     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2967     return ret;
2968 }
2969
2970 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2971 {
2972     ULONG refcount;
2973
2974     TRACE("Surface %p, container %p of type %#x.\n",
2975             surface, surface->container.u.base, surface->container.type);
2976
2977     switch (surface->container.type)
2978     {
2979         case WINED3D_CONTAINER_TEXTURE:
2980             return wined3d_texture_incref(surface->container.u.texture);
2981
2982         case WINED3D_CONTAINER_SWAPCHAIN:
2983             return wined3d_swapchain_incref(surface->container.u.swapchain);
2984
2985         default:
2986             ERR("Unhandled container type %#x.\n", surface->container.type);
2987         case WINED3D_CONTAINER_NONE:
2988             break;
2989     }
2990
2991     refcount = InterlockedIncrement(&surface->resource.ref);
2992     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2993
2994     return refcount;
2995 }
2996
2997 /* Do not call while under the GL lock. */
2998 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2999 {
3000     ULONG refcount;
3001
3002     TRACE("Surface %p, container %p of type %#x.\n",
3003             surface, surface->container.u.base, surface->container.type);
3004
3005     switch (surface->container.type)
3006     {
3007         case WINED3D_CONTAINER_TEXTURE:
3008             return wined3d_texture_decref(surface->container.u.texture);
3009
3010         case WINED3D_CONTAINER_SWAPCHAIN:
3011             return wined3d_swapchain_decref(surface->container.u.swapchain);
3012
3013         default:
3014             ERR("Unhandled container type %#x.\n", surface->container.type);
3015         case WINED3D_CONTAINER_NONE:
3016             break;
3017     }
3018
3019     refcount = InterlockedDecrement(&surface->resource.ref);
3020     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
3021
3022     if (!refcount)
3023     {
3024         surface_cleanup(surface);
3025         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
3026
3027         TRACE("Destroyed surface %p.\n", surface);
3028         HeapFree(GetProcessHeap(), 0, surface);
3029     }
3030
3031     return refcount;
3032 }
3033
3034 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
3035 {
3036     return resource_set_priority(&surface->resource, priority);
3037 }
3038
3039 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
3040 {
3041     return resource_get_priority(&surface->resource);
3042 }
3043
3044 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
3045 {
3046     TRACE("surface %p.\n", surface);
3047
3048     if (!surface->resource.device->d3d_initialized)
3049     {
3050         ERR("D3D not initialized.\n");
3051         return;
3052     }
3053
3054     surface_internal_preload(surface, SRGB_ANY);
3055 }
3056
3057 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
3058 {
3059     TRACE("surface %p.\n", surface);
3060
3061     return surface->resource.parent;
3062 }
3063
3064 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
3065 {
3066     TRACE("surface %p.\n", surface);
3067
3068     return &surface->resource;
3069 }
3070
3071 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
3072 {
3073     TRACE("surface %p, flags %#x.\n", surface, flags);
3074
3075     switch (flags)
3076     {
3077         case WINEDDGBS_CANBLT:
3078         case WINEDDGBS_ISBLTDONE:
3079             return WINED3D_OK;
3080
3081         default:
3082             return WINED3DERR_INVALIDCALL;
3083     }
3084 }
3085
3086 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
3087 {
3088     TRACE("surface %p, flags %#x.\n", surface, flags);
3089
3090     /* XXX: DDERR_INVALIDSURFACETYPE */
3091
3092     switch (flags)
3093     {
3094         case WINEDDGFS_CANFLIP:
3095         case WINEDDGFS_ISFLIPDONE:
3096             return WINED3D_OK;
3097
3098         default:
3099             return WINED3DERR_INVALIDCALL;
3100     }
3101 }
3102
3103 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
3104 {
3105     TRACE("surface %p.\n", surface);
3106
3107     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
3108     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
3109 }
3110
3111 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
3112 {
3113     TRACE("surface %p.\n", surface);
3114
3115     surface->flags &= ~SFLAG_LOST;
3116     return WINED3D_OK;
3117 }
3118
3119 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
3120 {
3121     TRACE("surface %p, palette %p.\n", surface, palette);
3122
3123     if (surface->palette == palette)
3124     {
3125         TRACE("Nop palette change.\n");
3126         return WINED3D_OK;
3127     }
3128
3129     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
3130         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
3131
3132     surface->palette = palette;
3133
3134     if (palette)
3135     {
3136         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3137             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
3138
3139         surface->surface_ops->surface_realize_palette(surface);
3140     }
3141
3142     return WINED3D_OK;
3143 }
3144
3145 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
3146         DWORD flags, const struct wined3d_color_key *color_key)
3147 {
3148     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
3149
3150     if (flags & WINEDDCKEY_COLORSPACE)
3151     {
3152         FIXME(" colorkey value not supported (%08x) !\n", flags);
3153         return WINED3DERR_INVALIDCALL;
3154     }
3155
3156     /* Dirtify the surface, but only if a key was changed. */
3157     if (color_key)
3158     {
3159         switch (flags & ~WINEDDCKEY_COLORSPACE)
3160         {
3161             case WINEDDCKEY_DESTBLT:
3162                 surface->dst_blt_color_key = *color_key;
3163                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3164                 break;
3165
3166             case WINEDDCKEY_DESTOVERLAY:
3167                 surface->dst_overlay_color_key = *color_key;
3168                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3169                 break;
3170
3171             case WINEDDCKEY_SRCOVERLAY:
3172                 surface->src_overlay_color_key = *color_key;
3173                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3174                 break;
3175
3176             case WINEDDCKEY_SRCBLT:
3177                 surface->src_blt_color_key = *color_key;
3178                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3179                 break;
3180         }
3181     }
3182     else
3183     {
3184         switch (flags & ~WINEDDCKEY_COLORSPACE)
3185         {
3186             case WINEDDCKEY_DESTBLT:
3187                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3188                 break;
3189
3190             case WINEDDCKEY_DESTOVERLAY:
3191                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3192                 break;
3193
3194             case WINEDDCKEY_SRCOVERLAY:
3195                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3196                 break;
3197
3198             case WINEDDCKEY_SRCBLT:
3199                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3200                 break;
3201         }
3202     }
3203
3204     return WINED3D_OK;
3205 }
3206
3207 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3208 {
3209     TRACE("surface %p.\n", surface);
3210
3211     return surface->palette;
3212 }
3213
3214 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3215 {
3216     const struct wined3d_format *format = surface->resource.format;
3217     DWORD pitch;
3218
3219     TRACE("surface %p.\n", surface);
3220
3221     if (format->flags & WINED3DFMT_FLAG_BLOCKS)
3222     {
3223         /* Since compressed formats are block based, pitch means the amount of
3224          * bytes to the next row of block rather than the next row of pixels. */
3225         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3226         pitch = row_block_count * format->block_byte_count;
3227     }
3228     else
3229     {
3230         unsigned char alignment = surface->resource.device->surface_alignment;
3231         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3232         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3233     }
3234
3235     TRACE("Returning %u.\n", pitch);
3236
3237     return pitch;
3238 }
3239
3240 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3241 {
3242     TRACE("surface %p, mem %p.\n", surface, mem);
3243
3244     if (surface->resource.map_count || (surface->flags & SFLAG_DCINUSE))
3245     {
3246         WARN("Surface is mapped or the DC is in use.\n");
3247         return WINED3DERR_INVALIDCALL;
3248     }
3249
3250     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3251     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3252     {
3253         ERR("Not supported on render targets.\n");
3254         return WINED3DERR_INVALIDCALL;
3255     }
3256
3257     if (mem && mem != surface->resource.allocatedMemory)
3258     {
3259         void *release = NULL;
3260
3261         /* Do I have to copy the old surface content? */
3262         if (surface->flags & SFLAG_DIBSECTION)
3263         {
3264             DeleteDC(surface->hDC);
3265             DeleteObject(surface->dib.DIBsection);
3266             surface->dib.bitmap_data = NULL;
3267             surface->resource.allocatedMemory = NULL;
3268             surface->hDC = NULL;
3269             surface->flags &= ~SFLAG_DIBSECTION;
3270         }
3271         else if (!(surface->flags & SFLAG_USERPTR))
3272         {
3273             release = surface->resource.heapMemory;
3274             surface->resource.heapMemory = NULL;
3275         }
3276         surface->resource.allocatedMemory = mem;
3277         surface->flags |= SFLAG_USERPTR;
3278
3279         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3280         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3281
3282         /* For client textures OpenGL has to be notified. */
3283         if (surface->flags & SFLAG_CLIENT)
3284             surface_release_client_storage(surface);
3285
3286         /* Now free the old memory if any. */
3287         HeapFree(GetProcessHeap(), 0, release);
3288     }
3289     else if (surface->flags & SFLAG_USERPTR)
3290     {
3291         /* HeapMemory should be NULL already. */
3292         if (surface->resource.heapMemory)
3293             ERR("User pointer surface has heap memory allocated.\n");
3294
3295         if (!mem)
3296         {
3297             surface->resource.allocatedMemory = NULL;
3298             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3299
3300             if (surface->flags & SFLAG_CLIENT)
3301                 surface_release_client_storage(surface);
3302
3303             surface_prepare_system_memory(surface);
3304         }
3305
3306         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3307     }
3308
3309     return WINED3D_OK;
3310 }
3311
3312 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3313 {
3314     LONG w, h;
3315
3316     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3317
3318     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3319     {
3320         WARN("Not an overlay surface.\n");
3321         return WINEDDERR_NOTAOVERLAYSURFACE;
3322     }
3323
3324     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3325     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3326     surface->overlay_destrect.left = x;
3327     surface->overlay_destrect.top = y;
3328     surface->overlay_destrect.right = x + w;
3329     surface->overlay_destrect.bottom = y + h;
3330
3331     surface_draw_overlay(surface);
3332
3333     return WINED3D_OK;
3334 }
3335
3336 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3337 {
3338     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3339
3340     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3341     {
3342         TRACE("Not an overlay surface.\n");
3343         return WINEDDERR_NOTAOVERLAYSURFACE;
3344     }
3345
3346     if (!surface->overlay_dest)
3347     {
3348         TRACE("Overlay not visible.\n");
3349         *x = 0;
3350         *y = 0;
3351         return WINEDDERR_OVERLAYNOTVISIBLE;
3352     }
3353
3354     *x = surface->overlay_destrect.left;
3355     *y = surface->overlay_destrect.top;
3356
3357     TRACE("Returning position %d, %d.\n", *x, *y);
3358
3359     return WINED3D_OK;
3360 }
3361
3362 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3363         DWORD flags, struct wined3d_surface *ref)
3364 {
3365     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3366
3367     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3368     {
3369         TRACE("Not an overlay surface.\n");
3370         return WINEDDERR_NOTAOVERLAYSURFACE;
3371     }
3372
3373     return WINED3D_OK;
3374 }
3375
3376 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3377         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3378 {
3379     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3380             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3381
3382     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3383     {
3384         WARN("Not an overlay surface.\n");
3385         return WINEDDERR_NOTAOVERLAYSURFACE;
3386     }
3387     else if (!dst_surface)
3388     {
3389         WARN("Dest surface is NULL.\n");
3390         return WINED3DERR_INVALIDCALL;
3391     }
3392
3393     if (src_rect)
3394     {
3395         surface->overlay_srcrect = *src_rect;
3396     }
3397     else
3398     {
3399         surface->overlay_srcrect.left = 0;
3400         surface->overlay_srcrect.top = 0;
3401         surface->overlay_srcrect.right = surface->resource.width;
3402         surface->overlay_srcrect.bottom = surface->resource.height;
3403     }
3404
3405     if (dst_rect)
3406     {
3407         surface->overlay_destrect = *dst_rect;
3408     }
3409     else
3410     {
3411         surface->overlay_destrect.left = 0;
3412         surface->overlay_destrect.top = 0;
3413         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3414         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3415     }
3416
3417     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3418     {
3419         surface->overlay_dest = NULL;
3420         list_remove(&surface->overlay_entry);
3421     }
3422
3423     if (flags & WINEDDOVER_SHOW)
3424     {
3425         if (surface->overlay_dest != dst_surface)
3426         {
3427             surface->overlay_dest = dst_surface;
3428             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3429         }
3430     }
3431     else if (flags & WINEDDOVER_HIDE)
3432     {
3433         /* tests show that the rectangles are erased on hide */
3434         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3435         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3436         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3437         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3438         surface->overlay_dest = NULL;
3439     }
3440
3441     surface_draw_overlay(surface);
3442
3443     return WINED3D_OK;
3444 }
3445
3446 HRESULT CDECL wined3d_surface_update_desc(struct wined3d_surface *surface,
3447         UINT width, UINT height, enum wined3d_format_id format_id,
3448         enum wined3d_multisample_type multisample_type, UINT multisample_quality)
3449 {
3450     struct wined3d_device *device = surface->resource.device;
3451     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
3452     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
3453     UINT resource_size = wined3d_format_calculate_size(format, device->surface_alignment, width, height);
3454
3455     TRACE("surface %p, width %u, height %u, format %s, multisample_type %#x, multisample_quality %u.\n",
3456             surface, width, height, debug_d3dformat(format_id), multisample_type, multisample_type);
3457
3458     if (!resource_size)
3459         return WINED3DERR_INVALIDCALL;
3460
3461     if (device->d3d_initialized)
3462         surface->resource.resource_ops->resource_unload(&surface->resource);
3463
3464     if (surface->flags & SFLAG_DIBSECTION)
3465     {
3466         DeleteDC(surface->hDC);
3467         DeleteObject(surface->dib.DIBsection);
3468         surface->dib.bitmap_data = NULL;
3469         surface->flags &= ~SFLAG_DIBSECTION;
3470     }
3471
3472     surface->flags &= ~(SFLAG_LOCATIONS | SFLAG_USERPTR);
3473     surface->resource.allocatedMemory = NULL;
3474     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
3475     surface->resource.heapMemory = NULL;
3476
3477     surface->resource.width = width;
3478     surface->resource.height = height;
3479     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[ARB_TEXTURE_RECTANGLE]
3480             || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
3481     {
3482         surface->pow2Width = width;
3483         surface->pow2Height = height;
3484     }
3485     else
3486     {
3487         surface->pow2Width = surface->pow2Height = 1;
3488         while (surface->pow2Width < width)
3489             surface->pow2Width <<= 1;
3490         while (surface->pow2Height < height)
3491             surface->pow2Height <<= 1;
3492     }
3493
3494     if (surface->pow2Width != width || surface->pow2Height != height)
3495         surface->flags |= SFLAG_NONPOW2;
3496     else
3497         surface->flags &= ~SFLAG_NONPOW2;
3498
3499     surface->resource.format = format;
3500     surface->resource.multisample_type = multisample_type;
3501     surface->resource.multisample_quality = multisample_quality;
3502     surface->resource.size = resource_size;
3503
3504     if (!surface_init_sysmem(surface))
3505         return E_OUTOFMEMORY;
3506
3507     return WINED3D_OK;
3508 }
3509
3510 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3511         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3512 {
3513     unsigned short *dst_s;
3514     const float *src_f;
3515     unsigned int x, y;
3516
3517     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3518
3519     for (y = 0; y < h; ++y)
3520     {
3521         src_f = (const float *)(src + y * pitch_in);
3522         dst_s = (unsigned short *) (dst + y * pitch_out);
3523         for (x = 0; x < w; ++x)
3524         {
3525             dst_s[x] = float_32_to_16(src_f + x);
3526         }
3527     }
3528 }
3529
3530 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3531         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3532 {
3533     static const unsigned char convert_5to8[] =
3534     {
3535         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3536         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3537         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3538         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3539     };
3540     static const unsigned char convert_6to8[] =
3541     {
3542         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3543         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3544         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3545         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3546         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3547         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3548         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3549         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3550     };
3551     unsigned int x, y;
3552
3553     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3554
3555     for (y = 0; y < h; ++y)
3556     {
3557         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3558         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3559         for (x = 0; x < w; ++x)
3560         {
3561             WORD pixel = src_line[x];
3562             dst_line[x] = 0xff000000
3563                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3564                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3565                     | convert_5to8[(pixel & 0x001f)];
3566         }
3567     }
3568 }
3569
3570 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3571  * in both cases we're just setting the X / Alpha channel to 0xff. */
3572 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3573         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3574 {
3575     unsigned int x, y;
3576
3577     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3578
3579     for (y = 0; y < h; ++y)
3580     {
3581         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3582         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3583
3584         for (x = 0; x < w; ++x)
3585         {
3586             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3587         }
3588     }
3589 }
3590
3591 static inline BYTE cliptobyte(int x)
3592 {
3593     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3594 }
3595
3596 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3597         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3598 {
3599     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3600     unsigned int x, y;
3601
3602     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3603
3604     for (y = 0; y < h; ++y)
3605     {
3606         const BYTE *src_line = src + y * pitch_in;
3607         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3608         for (x = 0; x < w; ++x)
3609         {
3610             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3611              *     C = Y - 16; D = U - 128; E = V - 128;
3612              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3613              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3614              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3615              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3616              * U and V are shared between the pixels. */
3617             if (!(x & 1)) /* For every even pixel, read new U and V. */
3618             {
3619                 d = (int) src_line[1] - 128;
3620                 e = (int) src_line[3] - 128;
3621                 r2 = 409 * e + 128;
3622                 g2 = - 100 * d - 208 * e + 128;
3623                 b2 = 516 * d + 128;
3624             }
3625             c2 = 298 * ((int) src_line[0] - 16);
3626             dst_line[x] = 0xff000000
3627                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3628                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3629                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3630                 /* Scale RGB values to 0..255 range,
3631                  * then clip them if still not in range (may be negative),
3632                  * then shift them within DWORD if necessary. */
3633             src_line += 2;
3634         }
3635     }
3636 }
3637
3638 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3639         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3640 {
3641     unsigned int x, y;
3642     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3643
3644     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3645
3646     for (y = 0; y < h; ++y)
3647     {
3648         const BYTE *src_line = src + y * pitch_in;
3649         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3650         for (x = 0; x < w; ++x)
3651         {
3652             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3653              *     C = Y - 16; D = U - 128; E = V - 128;
3654              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3655              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3656              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3657              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3658              * U and V are shared between the pixels. */
3659             if (!(x & 1)) /* For every even pixel, read new U and V. */
3660             {
3661                 d = (int) src_line[1] - 128;
3662                 e = (int) src_line[3] - 128;
3663                 r2 = 409 * e + 128;
3664                 g2 = - 100 * d - 208 * e + 128;
3665                 b2 = 516 * d + 128;
3666             }
3667             c2 = 298 * ((int) src_line[0] - 16);
3668             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3669                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3670                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3671                 /* Scale RGB values to 0..255 range,
3672                  * then clip them if still not in range (may be negative),
3673                  * then shift them within DWORD if necessary. */
3674             src_line += 2;
3675         }
3676     }
3677 }
3678
3679 struct d3dfmt_convertor_desc
3680 {
3681     enum wined3d_format_id from, to;
3682     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3683 };
3684
3685 static const struct d3dfmt_convertor_desc convertors[] =
3686 {
3687     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3688     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3689     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3690     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3691     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3692     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3693 };
3694
3695 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3696         enum wined3d_format_id to)
3697 {
3698     unsigned int i;
3699
3700     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3701     {
3702         if (convertors[i].from == from && convertors[i].to == to)
3703             return &convertors[i];
3704     }
3705
3706     return NULL;
3707 }
3708
3709 /*****************************************************************************
3710  * surface_convert_format
3711  *
3712  * Creates a duplicate of a surface in a different format. Is used by Blt to
3713  * blit between surfaces with different formats.
3714  *
3715  * Parameters
3716  *  source: Source surface
3717  *  fmt: Requested destination format
3718  *
3719  *****************************************************************************/
3720 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3721 {
3722     struct wined3d_map_desc src_map, dst_map;
3723     const struct d3dfmt_convertor_desc *conv;
3724     struct wined3d_surface *ret = NULL;
3725     HRESULT hr;
3726
3727     conv = find_convertor(source->resource.format->id, to_fmt);
3728     if (!conv)
3729     {
3730         FIXME("Cannot find a conversion function from format %s to %s.\n",
3731                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3732         return NULL;
3733     }
3734
3735     wined3d_surface_create(source->resource.device, source->resource.width,
3736             source->resource.height, to_fmt, 0 /* level */, 0 /* usage */, WINED3D_POOL_SCRATCH,
3737             WINED3D_MULTISAMPLE_NONE /* TODO: Multisampled conversion */, 0 /* MultiSampleQuality */,
3738             source->surface_type, WINED3D_SURFACE_MAPPABLE | WINED3D_SURFACE_DISCARD,
3739             NULL /* parent */, &wined3d_null_parent_ops, &ret);
3740     if (!ret)
3741     {
3742         ERR("Failed to create a destination surface for conversion.\n");
3743         return NULL;
3744     }
3745
3746     memset(&src_map, 0, sizeof(src_map));
3747     memset(&dst_map, 0, sizeof(dst_map));
3748
3749     if (FAILED(hr = wined3d_surface_map(source, &src_map, NULL, WINED3D_MAP_READONLY)))
3750     {
3751         ERR("Failed to lock the source surface.\n");
3752         wined3d_surface_decref(ret);
3753         return NULL;
3754     }
3755     if (FAILED(hr = wined3d_surface_map(ret, &dst_map, NULL, WINED3D_MAP_READONLY)))
3756     {
3757         ERR("Failed to lock the destination surface.\n");
3758         wined3d_surface_unmap(source);
3759         wined3d_surface_decref(ret);
3760         return NULL;
3761     }
3762
3763     conv->convert(src_map.data, dst_map.data, src_map.row_pitch, dst_map.row_pitch,
3764             source->resource.width, source->resource.height);
3765
3766     wined3d_surface_unmap(ret);
3767     wined3d_surface_unmap(source);
3768
3769     return ret;
3770 }
3771
3772 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3773         unsigned int bpp, UINT pitch, DWORD color)
3774 {
3775     BYTE *first;
3776     int x, y;
3777
3778     /* Do first row */
3779
3780 #define COLORFILL_ROW(type) \
3781 do { \
3782     type *d = (type *)buf; \
3783     for (x = 0; x < width; ++x) \
3784         d[x] = (type)color; \
3785 } while(0)
3786
3787     switch (bpp)
3788     {
3789         case 1:
3790             COLORFILL_ROW(BYTE);
3791             break;
3792
3793         case 2:
3794             COLORFILL_ROW(WORD);
3795             break;
3796
3797         case 3:
3798         {
3799             BYTE *d = buf;
3800             for (x = 0; x < width; ++x, d += 3)
3801             {
3802                 d[0] = (color      ) & 0xFF;
3803                 d[1] = (color >>  8) & 0xFF;
3804                 d[2] = (color >> 16) & 0xFF;
3805             }
3806             break;
3807         }
3808         case 4:
3809             COLORFILL_ROW(DWORD);
3810             break;
3811
3812         default:
3813             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3814             return WINED3DERR_NOTAVAILABLE;
3815     }
3816
3817 #undef COLORFILL_ROW
3818
3819     /* Now copy first row. */
3820     first = buf;
3821     for (y = 1; y < height; ++y)
3822     {
3823         buf += pitch;
3824         memcpy(buf, first, width * bpp);
3825     }
3826
3827     return WINED3D_OK;
3828 }
3829
3830 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3831 {
3832     TRACE("surface %p.\n", surface);
3833
3834     if (!surface->resource.map_count)
3835     {
3836         WARN("Trying to unmap unmapped surface.\n");
3837         return WINEDDERR_NOTLOCKED;
3838     }
3839     --surface->resource.map_count;
3840
3841     surface->surface_ops->surface_unmap(surface);
3842
3843     return WINED3D_OK;
3844 }
3845
3846 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3847         struct wined3d_map_desc *map_desc, const RECT *rect, DWORD flags)
3848 {
3849     const struct wined3d_format *format = surface->resource.format;
3850
3851     TRACE("surface %p, map_desc %p, rect %s, flags %#x.\n",
3852             surface, map_desc, wine_dbgstr_rect(rect), flags);
3853
3854     if (surface->resource.map_count)
3855     {
3856         WARN("Surface is already mapped.\n");
3857         return WINED3DERR_INVALIDCALL;
3858     }
3859     if ((format->flags & WINED3DFMT_FLAG_BLOCKS)
3860             && rect && (rect->left || rect->top
3861             || rect->right != surface->resource.width
3862             || rect->bottom != surface->resource.height))
3863     {
3864         UINT width_mask = format->block_width - 1;
3865         UINT height_mask = format->block_height - 1;
3866
3867         if ((rect->left & width_mask) || (rect->right & width_mask)
3868                 || (rect->top & height_mask) || (rect->bottom & height_mask))
3869         {
3870             WARN("Map rect %s is misaligned for %ux%u blocks.\n",
3871                     wine_dbgstr_rect(rect), format->block_width, format->block_height);
3872
3873             if (surface->resource.pool == WINED3D_POOL_DEFAULT)
3874                 return WINED3DERR_INVALIDCALL;
3875         }
3876     }
3877
3878     ++surface->resource.map_count;
3879
3880     if (!(surface->flags & SFLAG_LOCKABLE))
3881         WARN("Trying to lock unlockable surface.\n");
3882
3883     /* Performance optimization: Count how often a surface is mapped, if it is
3884      * mapped regularly do not throw away the system memory copy. This avoids
3885      * the need to download the surface from OpenGL all the time. The surface
3886      * is still downloaded if the OpenGL texture is changed. */
3887     if (!(surface->flags & SFLAG_DYNLOCK))
3888     {
3889         if (++surface->lockCount > MAXLOCKCOUNT)
3890         {
3891             TRACE("Surface is mapped regularly, not freeing the system memory copy any more.\n");
3892             surface->flags |= SFLAG_DYNLOCK;
3893         }
3894     }
3895
3896     surface->surface_ops->surface_map(surface, rect, flags);
3897
3898     if (format->flags & WINED3DFMT_FLAG_BROKEN_PITCH)
3899         map_desc->row_pitch = surface->resource.width * format->byte_count;
3900     else
3901         map_desc->row_pitch = wined3d_surface_get_pitch(surface);
3902     map_desc->slice_pitch = 0;
3903
3904     if (!rect)
3905     {
3906         map_desc->data = surface->resource.allocatedMemory;
3907         surface->lockedRect.left = 0;
3908         surface->lockedRect.top = 0;
3909         surface->lockedRect.right = surface->resource.width;
3910         surface->lockedRect.bottom = surface->resource.height;
3911     }
3912     else
3913     {
3914         if ((format->flags & (WINED3DFMT_FLAG_BLOCKS | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_BLOCKS)
3915         {
3916             /* Compressed textures are block based, so calculate the offset of
3917              * the block that contains the top-left pixel of the locked rectangle. */
3918             map_desc->data = surface->resource.allocatedMemory
3919                     + ((rect->top / format->block_height) * map_desc->row_pitch)
3920                     + ((rect->left / format->block_width) * format->block_byte_count);
3921         }
3922         else
3923         {
3924             map_desc->data = surface->resource.allocatedMemory
3925                     + (map_desc->row_pitch * rect->top)
3926                     + (rect->left * format->byte_count);
3927         }
3928         surface->lockedRect.left = rect->left;
3929         surface->lockedRect.top = rect->top;
3930         surface->lockedRect.right = rect->right;
3931         surface->lockedRect.bottom = rect->bottom;
3932     }
3933
3934     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3935     TRACE("Returning memory %p, pitch %u.\n", map_desc->data, map_desc->row_pitch);
3936
3937     return WINED3D_OK;
3938 }
3939
3940 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3941 {
3942     struct wined3d_map_desc map;
3943     HRESULT hr;
3944
3945     TRACE("surface %p, dc %p.\n", surface, dc);
3946
3947     if (surface->flags & SFLAG_USERPTR)
3948     {
3949         ERR("Not supported on surfaces with application-provided memory.\n");
3950         return WINEDDERR_NODC;
3951     }
3952
3953     /* Give more detailed info for ddraw. */
3954     if (surface->flags & SFLAG_DCINUSE)
3955         return WINEDDERR_DCALREADYCREATED;
3956
3957     /* Can't GetDC if the surface is locked. */
3958     if (surface->resource.map_count)
3959         return WINED3DERR_INVALIDCALL;
3960
3961     /* Create a DIB section if there isn't a dc yet. */
3962     if (!surface->hDC)
3963     {
3964         if (surface->flags & SFLAG_CLIENT)
3965         {
3966             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3967             surface_release_client_storage(surface);
3968         }
3969         hr = surface_create_dib_section(surface);
3970         if (FAILED(hr))
3971             return WINED3DERR_INVALIDCALL;
3972
3973         /* Use the DIB section from now on if we are not using a PBO. */
3974         if (!(surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM)))
3975         {
3976             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
3977             surface->resource.heapMemory = NULL;
3978             surface->resource.allocatedMemory = surface->dib.bitmap_data;
3979         }
3980     }
3981
3982     /* Map the surface. */
3983     hr = wined3d_surface_map(surface, &map, NULL, 0);
3984     if (FAILED(hr))
3985     {
3986         ERR("Map failed, hr %#x.\n", hr);
3987         return hr;
3988     }
3989
3990     /* Sync the DIB with the PBO. This can't be done earlier because Map()
3991      * activates the allocatedMemory. */
3992     if (surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM))
3993         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
3994
3995     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3996             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3997     {
3998         /* GetDC on palettized formats is unsupported in D3D9, and the method
3999          * is missing in D3D8, so this should only be used for DX <=7
4000          * surfaces (with non-device palettes). */
4001         const PALETTEENTRY *pal = NULL;
4002
4003         if (surface->palette)
4004         {
4005             pal = surface->palette->palents;
4006         }
4007         else
4008         {
4009             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
4010             struct wined3d_surface *dds_primary = swapchain->front_buffer;
4011
4012             if (dds_primary && dds_primary->palette)
4013                 pal = dds_primary->palette->palents;
4014         }
4015
4016         if (pal)
4017         {
4018             RGBQUAD col[256];
4019             unsigned int i;
4020
4021             for (i = 0; i < 256; ++i)
4022             {
4023                 col[i].rgbRed = pal[i].peRed;
4024                 col[i].rgbGreen = pal[i].peGreen;
4025                 col[i].rgbBlue = pal[i].peBlue;
4026                 col[i].rgbReserved = 0;
4027             }
4028             SetDIBColorTable(surface->hDC, 0, 256, col);
4029         }
4030     }
4031
4032     surface->flags |= SFLAG_DCINUSE;
4033
4034     *dc = surface->hDC;
4035     TRACE("Returning dc %p.\n", *dc);
4036
4037     return WINED3D_OK;
4038 }
4039
4040 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
4041 {
4042     TRACE("surface %p, dc %p.\n", surface, dc);
4043
4044     if (!(surface->flags & SFLAG_DCINUSE))
4045         return WINEDDERR_NODC;
4046
4047     if (surface->hDC != dc)
4048     {
4049         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
4050                 dc, surface->hDC);
4051         return WINEDDERR_NODC;
4052     }
4053
4054     /* Copy the contents of the DIB over to the PBO. */
4055     if ((surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM)) && surface->resource.allocatedMemory)
4056         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
4057
4058     /* We locked first, so unlock now. */
4059     wined3d_surface_unmap(surface);
4060
4061     surface->flags &= ~SFLAG_DCINUSE;
4062
4063     return WINED3D_OK;
4064 }
4065
4066 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
4067 {
4068     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
4069
4070     if (flags)
4071     {
4072         static UINT once;
4073         if (!once++)
4074             FIXME("Ignoring flags %#x.\n", flags);
4075         else
4076             WARN("Ignoring flags %#x.\n", flags);
4077     }
4078
4079     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
4080     {
4081         ERR("Not supported on swapchain surfaces.\n");
4082         return WINEDDERR_NOTFLIPPABLE;
4083     }
4084
4085     /* Flipping is only supported on render targets and overlays. */
4086     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
4087     {
4088         WARN("Tried to flip a non-render target, non-overlay surface.\n");
4089         return WINEDDERR_NOTFLIPPABLE;
4090     }
4091
4092     flip_surface(surface, override);
4093
4094     /* Update overlays if they're visible. */
4095     if ((surface->resource.usage & WINED3DUSAGE_OVERLAY) && surface->overlay_dest)
4096         return surface_draw_overlay(surface);
4097
4098     return WINED3D_OK;
4099 }
4100
4101 /* Do not call while under the GL lock. */
4102 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
4103 {
4104     struct wined3d_device *device = surface->resource.device;
4105
4106     TRACE("iface %p, srgb %#x.\n", surface, srgb);
4107
4108     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4109     {
4110         struct wined3d_texture *texture = surface->container.u.texture;
4111
4112         TRACE("Passing to container (%p).\n", texture);
4113         texture->texture_ops->texture_preload(texture, srgb);
4114     }
4115     else
4116     {
4117         struct wined3d_context *context;
4118
4119         TRACE("(%p) : About to load surface\n", surface);
4120
4121         /* TODO: Use already acquired context when possible. */
4122         context = context_acquire(device, NULL);
4123
4124         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
4125
4126         if (surface->resource.pool == WINED3D_POOL_DEFAULT)
4127         {
4128             /* Tell opengl to try and keep this texture in video ram (well mostly) */
4129             GLclampf tmp;
4130             tmp = 0.9f;
4131             ENTER_GL();
4132             glPrioritizeTextures(1, &surface->texture_name, &tmp);
4133             LEAVE_GL();
4134         }
4135
4136         context_release(context);
4137     }
4138 }
4139
4140 /* Read the framebuffer back into the surface */
4141 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
4142 {
4143     struct wined3d_device *device = surface->resource.device;
4144     const struct wined3d_gl_info *gl_info;
4145     struct wined3d_context *context;
4146     BYTE *mem;
4147     GLint fmt;
4148     GLint type;
4149     BYTE *row, *top, *bottom;
4150     int i;
4151     BOOL bpp;
4152     RECT local_rect;
4153     BOOL srcIsUpsideDown;
4154     GLint rowLen = 0;
4155     GLint skipPix = 0;
4156     GLint skipRow = 0;
4157
4158     context = context_acquire(device, surface);
4159     context_apply_blit_state(context, device);
4160     gl_info = context->gl_info;
4161
4162     ENTER_GL();
4163
4164     /* Select the correct read buffer, and give some debug output.
4165      * There is no need to keep track of the current read buffer or reset it, every part of the code
4166      * that reads sets the read buffer as desired.
4167      */
4168     if (surface_is_offscreen(surface))
4169     {
4170         /* Mapping the primary render target which is not on a swapchain.
4171          * Read from the back buffer. */
4172         TRACE("Mapping offscreen render target.\n");
4173         glReadBuffer(device->offscreenBuffer);
4174         srcIsUpsideDown = TRUE;
4175     }
4176     else
4177     {
4178         /* Onscreen surfaces are always part of a swapchain */
4179         GLenum buffer = surface_get_gl_buffer(surface);
4180         TRACE("Mapping %#x buffer.\n", buffer);
4181         glReadBuffer(buffer);
4182         checkGLcall("glReadBuffer");
4183         srcIsUpsideDown = FALSE;
4184     }
4185
4186     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
4187     if (!rect)
4188     {
4189         local_rect.left = 0;
4190         local_rect.top = 0;
4191         local_rect.right = surface->resource.width;
4192         local_rect.bottom = surface->resource.height;
4193     }
4194     else
4195     {
4196         local_rect = *rect;
4197     }
4198     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
4199
4200     switch (surface->resource.format->id)
4201     {
4202         case WINED3DFMT_P8_UINT:
4203         {
4204             if (primary_render_target_is_p8(device))
4205             {
4206                 /* In case of P8 render targets the index is stored in the alpha component */
4207                 fmt = GL_ALPHA;
4208                 type = GL_UNSIGNED_BYTE;
4209                 mem = dest;
4210                 bpp = surface->resource.format->byte_count;
4211             }
4212             else
4213             {
4214                 /* GL can't return palettized data, so read ARGB pixels into a
4215                  * separate block of memory and convert them into palettized format
4216                  * in software. Slow, but if the app means to use palettized render
4217                  * targets and locks it...
4218                  *
4219                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4220                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4221                  * for the color channels when palettizing the colors.
4222                  */
4223                 fmt = GL_RGB;
4224                 type = GL_UNSIGNED_BYTE;
4225                 pitch *= 3;
4226                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4227                 if (!mem)
4228                 {
4229                     ERR("Out of memory\n");
4230                     LEAVE_GL();
4231                     return;
4232                 }
4233                 bpp = surface->resource.format->byte_count * 3;
4234             }
4235         }
4236         break;
4237
4238         default:
4239             mem = dest;
4240             fmt = surface->resource.format->glFormat;
4241             type = surface->resource.format->glType;
4242             bpp = surface->resource.format->byte_count;
4243     }
4244
4245     if (surface->flags & SFLAG_PBO)
4246     {
4247         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4248         checkGLcall("glBindBufferARB");
4249         if (mem)
4250         {
4251             ERR("mem not null for pbo -- unexpected\n");
4252             mem = NULL;
4253         }
4254     }
4255
4256     /* Save old pixel store pack state */
4257     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4258     checkGLcall("glGetIntegerv");
4259     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4260     checkGLcall("glGetIntegerv");
4261     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4262     checkGLcall("glGetIntegerv");
4263
4264     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4265     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4266     checkGLcall("glPixelStorei");
4267     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4268     checkGLcall("glPixelStorei");
4269     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4270     checkGLcall("glPixelStorei");
4271
4272     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4273             local_rect.right - local_rect.left,
4274             local_rect.bottom - local_rect.top,
4275             fmt, type, mem);
4276     checkGLcall("glReadPixels");
4277
4278     /* Reset previous pixel store pack state */
4279     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4280     checkGLcall("glPixelStorei");
4281     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4282     checkGLcall("glPixelStorei");
4283     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4284     checkGLcall("glPixelStorei");
4285
4286     if (surface->flags & SFLAG_PBO)
4287     {
4288         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4289         checkGLcall("glBindBufferARB");
4290
4291         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4292          * to get a pointer to it and perform the flipping in software. This is a lot
4293          * faster than calling glReadPixels for each line. In case we want more speed
4294          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4295         if (!srcIsUpsideDown)
4296         {
4297             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4298             checkGLcall("glBindBufferARB");
4299
4300             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4301             checkGLcall("glMapBufferARB");
4302         }
4303     }
4304
4305     /* TODO: Merge this with the palettization loop below for P8 targets */
4306     if(!srcIsUpsideDown) {
4307         UINT len, off;
4308         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4309             Flip the lines in software */
4310         len = (local_rect.right - local_rect.left) * bpp;
4311         off = local_rect.left * bpp;
4312
4313         row = HeapAlloc(GetProcessHeap(), 0, len);
4314         if(!row) {
4315             ERR("Out of memory\n");
4316             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4317                 HeapFree(GetProcessHeap(), 0, mem);
4318             LEAVE_GL();
4319             return;
4320         }
4321
4322         top = mem + pitch * local_rect.top;
4323         bottom = mem + pitch * (local_rect.bottom - 1);
4324         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4325             memcpy(row, top + off, len);
4326             memcpy(top + off, bottom + off, len);
4327             memcpy(bottom + off, row, len);
4328             top += pitch;
4329             bottom -= pitch;
4330         }
4331         HeapFree(GetProcessHeap(), 0, row);
4332
4333         /* Unmap the temp PBO buffer */
4334         if (surface->flags & SFLAG_PBO)
4335         {
4336             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4337             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4338         }
4339     }
4340
4341     LEAVE_GL();
4342     context_release(context);
4343
4344     /* For P8 textures we need to perform an inverse palette lookup. This is
4345      * done by searching for a palette index which matches the RGB value.
4346      * Note this isn't guaranteed to work when there are multiple entries for
4347      * the same color but we have no choice. In case of P8 render targets,
4348      * the index is stored in the alpha component so no conversion is needed. */
4349     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4350     {
4351         const PALETTEENTRY *pal = NULL;
4352         DWORD width = pitch / 3;
4353         int x, y, c;
4354
4355         if (surface->palette)
4356         {
4357             pal = surface->palette->palents;
4358         }
4359         else
4360         {
4361             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4362             HeapFree(GetProcessHeap(), 0, mem);
4363             return;
4364         }
4365
4366         for(y = local_rect.top; y < local_rect.bottom; y++) {
4367             for(x = local_rect.left; x < local_rect.right; x++) {
4368                 /*                      start              lines            pixels      */
4369                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4370                 const BYTE *green = blue  + 1;
4371                 const BYTE *red = green + 1;
4372
4373                 for(c = 0; c < 256; c++) {
4374                     if(*red   == pal[c].peRed   &&
4375                        *green == pal[c].peGreen &&
4376                        *blue  == pal[c].peBlue)
4377                     {
4378                         *((BYTE *) dest + y * width + x) = c;
4379                         break;
4380                     }
4381                 }
4382             }
4383         }
4384         HeapFree(GetProcessHeap(), 0, mem);
4385     }
4386 }
4387
4388 /* Read the framebuffer contents into a texture. Note that this function
4389  * doesn't do any kind of flipping. Using this on an onscreen surface will
4390  * result in a flipped D3D texture. */
4391 void surface_load_fb_texture(struct wined3d_surface *surface, BOOL srgb)
4392 {
4393     struct wined3d_device *device = surface->resource.device;
4394     struct wined3d_context *context;
4395
4396     context = context_acquire(device, surface);
4397     device_invalidate_state(device, STATE_FRAMEBUFFER);
4398
4399     surface_prepare_texture(surface, context, srgb);
4400     surface_bind_and_dirtify(surface, context, srgb);
4401
4402     TRACE("Reading back offscreen render target %p.\n", surface);
4403
4404     ENTER_GL();
4405
4406     if (surface_is_offscreen(surface))
4407         glReadBuffer(device->offscreenBuffer);
4408     else
4409         glReadBuffer(surface_get_gl_buffer(surface));
4410     checkGLcall("glReadBuffer");
4411
4412     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4413             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4414     checkGLcall("glCopyTexSubImage2D");
4415
4416     LEAVE_GL();
4417
4418     context_release(context);
4419 }
4420
4421 /* Context activation is done by the caller. */
4422 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4423         struct wined3d_context *context, BOOL srgb)
4424 {
4425     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4426     enum wined3d_conversion_type convert;
4427     struct wined3d_format format;
4428
4429     if (surface->flags & alloc_flag) return;
4430
4431     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4432     if (convert != WINED3D_CT_NONE || format.convert)
4433         surface->flags |= SFLAG_CONVERTED;
4434     else surface->flags &= ~SFLAG_CONVERTED;
4435
4436     surface_bind_and_dirtify(surface, context, srgb);
4437     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4438     surface->flags |= alloc_flag;
4439 }
4440
4441 /* Context activation is done by the caller. */
4442 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4443 {
4444     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4445     {
4446         struct wined3d_texture *texture = surface->container.u.texture;
4447         UINT sub_count = texture->level_count * texture->layer_count;
4448         UINT i;
4449
4450         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4451
4452         for (i = 0; i < sub_count; ++i)
4453         {
4454             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4455             surface_prepare_texture_internal(s, context, srgb);
4456         }
4457
4458         return;
4459     }
4460
4461     surface_prepare_texture_internal(surface, context, srgb);
4462 }
4463
4464 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4465 {
4466     if (multisample)
4467     {
4468         if (surface->rb_multisample)
4469             return;
4470
4471         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4472         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4473         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4474                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4475         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4476     }
4477     else
4478     {
4479         if (surface->rb_resolved)
4480             return;
4481
4482         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4483         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4484         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4485                 surface->pow2Width, surface->pow2Height);
4486         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4487     }
4488 }
4489
4490 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4491         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4492 {
4493     struct wined3d_device *device = surface->resource.device;
4494     UINT pitch = wined3d_surface_get_pitch(surface);
4495     const struct wined3d_gl_info *gl_info;
4496     struct wined3d_context *context;
4497     RECT local_rect;
4498     UINT w, h;
4499
4500     surface_get_rect(surface, rect, &local_rect);
4501
4502     mem += local_rect.top * pitch + local_rect.left * bpp;
4503     w = local_rect.right - local_rect.left;
4504     h = local_rect.bottom - local_rect.top;
4505
4506     /* Activate the correct context for the render target */
4507     context = context_acquire(device, surface);
4508     context_apply_blit_state(context, device);
4509     gl_info = context->gl_info;
4510
4511     ENTER_GL();
4512
4513     if (!surface_is_offscreen(surface))
4514     {
4515         GLenum buffer = surface_get_gl_buffer(surface);
4516         TRACE("Unlocking %#x buffer.\n", buffer);
4517         context_set_draw_buffer(context, buffer);
4518
4519         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4520         glPixelZoom(1.0f, -1.0f);
4521     }
4522     else
4523     {
4524         /* Primary offscreen render target */
4525         TRACE("Offscreen render target.\n");
4526         context_set_draw_buffer(context, device->offscreenBuffer);
4527
4528         glPixelZoom(1.0f, 1.0f);
4529     }
4530
4531     glRasterPos3i(local_rect.left, local_rect.top, 1);
4532     checkGLcall("glRasterPos3i");
4533
4534     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4535     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4536
4537     if (surface->flags & SFLAG_PBO)
4538     {
4539         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4540         checkGLcall("glBindBufferARB");
4541     }
4542
4543     glDrawPixels(w, h, fmt, type, mem);
4544     checkGLcall("glDrawPixels");
4545
4546     if (surface->flags & SFLAG_PBO)
4547     {
4548         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4549         checkGLcall("glBindBufferARB");
4550     }
4551
4552     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4553     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4554
4555     LEAVE_GL();
4556
4557     if (wined3d_settings.strict_draw_ordering
4558             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4559             && surface->container.u.swapchain->front_buffer == surface))
4560         wglFlush();
4561
4562     context_release(context);
4563 }
4564
4565 static BOOL color_in_range(const struct wined3d_color_key *color_key, DWORD color)
4566 {
4567     /* FIXME: Is this really how color keys are supposed to work? I think it
4568      * makes more sense to compare the individual channels. */
4569     return color >= color_key->color_space_low_value
4570             && color <= color_key->color_space_high_value;
4571 }
4572
4573 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4574 {
4575     const struct wined3d_device *device = surface->resource.device;
4576     const struct wined3d_palette *pal = surface->palette;
4577     BOOL index_in_alpha = FALSE;
4578     unsigned int i;
4579
4580     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4581      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4582      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4583      * duplicate entries. Store the color key in the unused alpha component to speed the
4584      * download up and to make conversion unneeded. */
4585     index_in_alpha = primary_render_target_is_p8(device);
4586
4587     if (!pal)
4588     {
4589         ERR("This code should never get entered for DirectDraw!, expect problems\n");
4590         if (index_in_alpha)
4591         {
4592             /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4593              * there's no palette at this time. */
4594             for (i = 0; i < 256; i++) table[i][3] = i;
4595         }
4596     }
4597     else
4598     {
4599         TRACE("Using surface palette %p\n", pal);
4600         /* Get the surface's palette */
4601         for (i = 0; i < 256; ++i)
4602         {
4603             table[i][0] = pal->palents[i].peRed;
4604             table[i][1] = pal->palents[i].peGreen;
4605             table[i][2] = pal->palents[i].peBlue;
4606
4607             /* When index_in_alpha is set the palette index is stored in the
4608              * alpha component. In case of a readback we can then read
4609              * GL_ALPHA. Color keying is handled in BltOverride using a
4610              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4611              * color key itself is passed to glAlphaFunc in other cases the
4612              * alpha component of pixels that should be masked away is set to 0. */
4613             if (index_in_alpha)
4614                 table[i][3] = i;
4615             else if (colorkey && color_in_range(&surface->src_blt_color_key, i))
4616                 table[i][3] = 0x00;
4617             else if (pal->flags & WINEDDPCAPS_ALPHA)
4618                 table[i][3] = pal->palents[i].peFlags;
4619             else
4620                 table[i][3] = 0xFF;
4621         }
4622     }
4623 }
4624
4625 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width, UINT height,
4626         UINT outpitch, enum wined3d_conversion_type conversion_type, struct wined3d_surface *surface)
4627 {
4628     const BYTE *source;
4629     BYTE *dest;
4630
4631     TRACE("src %p, dst %p, pitch %u, width %u, height %u, outpitch %u, conversion_type %#x, surface %p.\n",
4632             src, dst, pitch, width, height, outpitch, conversion_type, surface);
4633
4634     switch (conversion_type)
4635     {
4636         case WINED3D_CT_NONE:
4637         {
4638             memcpy(dst, src, pitch * height);
4639             break;
4640         }
4641
4642         case WINED3D_CT_PALETTED:
4643         case WINED3D_CT_PALETTED_CK:
4644         {
4645             BYTE table[256][4];
4646             unsigned int x, y;
4647
4648             d3dfmt_p8_init_palette(surface, table, (conversion_type == WINED3D_CT_PALETTED_CK));
4649
4650             for (y = 0; y < height; y++)
4651             {
4652                 source = src + pitch * y;
4653                 dest = dst + outpitch * y;
4654                 /* This is an 1 bpp format, using the width here is fine */
4655                 for (x = 0; x < width; x++) {
4656                     BYTE color = *source++;
4657                     *dest++ = table[color][0];
4658                     *dest++ = table[color][1];
4659                     *dest++ = table[color][2];
4660                     *dest++ = table[color][3];
4661                 }
4662             }
4663         }
4664         break;
4665
4666         case WINED3D_CT_CK_565:
4667         {
4668             /* Converting the 565 format in 5551 packed to emulate color-keying.
4669
4670               Note : in all these conversion, it would be best to average the averaging
4671                       pixels to get the color of the pixel that will be color-keyed to
4672                       prevent 'color bleeding'. This will be done later on if ever it is
4673                       too visible.
4674
4675               Note2: Nvidia documents say that their driver does not support alpha + color keying
4676                      on the same surface and disables color keying in such a case
4677             */
4678             unsigned int x, y;
4679             const WORD *Source;
4680             WORD *Dest;
4681
4682             TRACE("Color keyed 565\n");
4683
4684             for (y = 0; y < height; y++) {
4685                 Source = (const WORD *)(src + y * pitch);
4686                 Dest = (WORD *) (dst + y * outpitch);
4687                 for (x = 0; x < width; x++ ) {
4688                     WORD color = *Source++;
4689                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4690                     if (!color_in_range(&surface->src_blt_color_key, color))
4691                         *Dest |= 0x0001;
4692                     Dest++;
4693                 }
4694             }
4695         }
4696         break;
4697
4698         case WINED3D_CT_CK_5551:
4699         {
4700             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4701             unsigned int x, y;
4702             const WORD *Source;
4703             WORD *Dest;
4704             TRACE("Color keyed 5551\n");
4705             for (y = 0; y < height; y++) {
4706                 Source = (const WORD *)(src + y * pitch);
4707                 Dest = (WORD *) (dst + y * outpitch);
4708                 for (x = 0; x < width; x++ ) {
4709                     WORD color = *Source++;
4710                     *Dest = color;
4711                     if (!color_in_range(&surface->src_blt_color_key, color))
4712                         *Dest |= (1 << 15);
4713                     else
4714                         *Dest &= ~(1 << 15);
4715                     Dest++;
4716                 }
4717             }
4718         }
4719         break;
4720
4721         case WINED3D_CT_CK_RGB24:
4722         {
4723             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4724             unsigned int x, y;
4725             for (y = 0; y < height; y++)
4726             {
4727                 source = src + pitch * y;
4728                 dest = dst + outpitch * y;
4729                 for (x = 0; x < width; x++) {
4730                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4731                     DWORD dstcolor = color << 8;
4732                     if (!color_in_range(&surface->src_blt_color_key, color))
4733                         dstcolor |= 0xff;
4734                     *(DWORD*)dest = dstcolor;
4735                     source += 3;
4736                     dest += 4;
4737                 }
4738             }
4739         }
4740         break;
4741
4742         case WINED3D_CT_RGB32_888:
4743         {
4744             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4745             unsigned int x, y;
4746             for (y = 0; y < height; y++)
4747             {
4748                 source = src + pitch * y;
4749                 dest = dst + outpitch * y;
4750                 for (x = 0; x < width; x++) {
4751                     DWORD color = 0xffffff & *(const DWORD*)source;
4752                     DWORD dstcolor = color << 8;
4753                     if (!color_in_range(&surface->src_blt_color_key, color))
4754                         dstcolor |= 0xff;
4755                     *(DWORD*)dest = dstcolor;
4756                     source += 4;
4757                     dest += 4;
4758                 }
4759             }
4760         }
4761         break;
4762
4763         case WINED3D_CT_CK_ARGB32:
4764         {
4765             unsigned int x, y;
4766             for (y = 0; y < height; ++y)
4767             {
4768                 source = src + pitch * y;
4769                 dest = dst + outpitch * y;
4770                 for (x = 0; x < width; ++x)
4771                 {
4772                     DWORD color = *(const DWORD *)source;
4773                     if (color_in_range(&surface->src_blt_color_key, color))
4774                         color &= ~0xff000000;
4775                     *(DWORD*)dest = color;
4776                     source += 4;
4777                     dest += 4;
4778                 }
4779             }
4780         }
4781         break;
4782
4783         default:
4784             ERR("Unsupported conversion type %#x.\n", conversion_type);
4785     }
4786     return WINED3D_OK;
4787 }
4788
4789 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4790 {
4791     /* Flip the surface contents */
4792     /* Flip the DC */
4793     {
4794         HDC tmp;
4795         tmp = front->hDC;
4796         front->hDC = back->hDC;
4797         back->hDC = tmp;
4798     }
4799
4800     /* Flip the DIBsection */
4801     {
4802         HBITMAP tmp = front->dib.DIBsection;
4803         front->dib.DIBsection = back->dib.DIBsection;
4804         back->dib.DIBsection = tmp;
4805     }
4806
4807     /* Flip the surface data */
4808     {
4809         void* tmp;
4810
4811         tmp = front->dib.bitmap_data;
4812         front->dib.bitmap_data = back->dib.bitmap_data;
4813         back->dib.bitmap_data = tmp;
4814
4815         tmp = front->resource.allocatedMemory;
4816         front->resource.allocatedMemory = back->resource.allocatedMemory;
4817         back->resource.allocatedMemory = tmp;
4818
4819         tmp = front->resource.heapMemory;
4820         front->resource.heapMemory = back->resource.heapMemory;
4821         back->resource.heapMemory = tmp;
4822     }
4823
4824     /* Flip the PBO */
4825     {
4826         GLuint tmp_pbo = front->pbo;
4827         front->pbo = back->pbo;
4828         back->pbo = tmp_pbo;
4829     }
4830
4831     /* Flip the opengl texture */
4832     {
4833         GLuint tmp;
4834
4835         tmp = back->texture_name;
4836         back->texture_name = front->texture_name;
4837         front->texture_name = tmp;
4838
4839         tmp = back->texture_name_srgb;
4840         back->texture_name_srgb = front->texture_name_srgb;
4841         front->texture_name_srgb = tmp;
4842
4843         tmp = back->rb_multisample;
4844         back->rb_multisample = front->rb_multisample;
4845         front->rb_multisample = tmp;
4846
4847         tmp = back->rb_resolved;
4848         back->rb_resolved = front->rb_resolved;
4849         front->rb_resolved = tmp;
4850
4851         resource_unload(&back->resource);
4852         resource_unload(&front->resource);
4853     }
4854
4855     {
4856         DWORD tmp_flags = back->flags;
4857         back->flags = front->flags;
4858         front->flags = tmp_flags;
4859     }
4860 }
4861
4862 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4863  * pixel copy calls. */
4864 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4865         const RECT *src_rect, const RECT *dst_rect_in, enum wined3d_texture_filter_type filter)
4866 {
4867     struct wined3d_device *device = dst_surface->resource.device;
4868     float xrel, yrel;
4869     UINT row;
4870     struct wined3d_context *context;
4871     BOOL upsidedown = FALSE;
4872     RECT dst_rect = *dst_rect_in;
4873
4874     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4875      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4876      */
4877     if(dst_rect.top > dst_rect.bottom) {
4878         UINT tmp = dst_rect.bottom;
4879         dst_rect.bottom = dst_rect.top;
4880         dst_rect.top = tmp;
4881         upsidedown = TRUE;
4882     }
4883
4884     context = context_acquire(device, src_surface);
4885     context_apply_blit_state(context, device);
4886     surface_internal_preload(dst_surface, SRGB_RGB);
4887     ENTER_GL();
4888
4889     /* Bind the target texture */
4890     context_bind_texture(context, dst_surface->texture_target, dst_surface->texture_name);
4891     if (surface_is_offscreen(src_surface))
4892     {
4893         TRACE("Reading from an offscreen target\n");
4894         upsidedown = !upsidedown;
4895         glReadBuffer(device->offscreenBuffer);
4896     }
4897     else
4898     {
4899         glReadBuffer(surface_get_gl_buffer(src_surface));
4900     }
4901     checkGLcall("glReadBuffer");
4902
4903     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4904     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4905
4906     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4907     {
4908         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4909
4910         if (filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT)
4911             ERR("Texture filtering not supported in direct blit.\n");
4912     }
4913     else if ((filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT)
4914             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4915     {
4916         ERR("Texture filtering not supported in direct blit\n");
4917     }
4918
4919     if (upsidedown
4920             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4921             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4922     {
4923         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4924
4925         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4926                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4927                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4928                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4929     }
4930     else
4931     {
4932         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4933         /* I have to process this row by row to swap the image,
4934          * otherwise it would be upside down, so stretching in y direction
4935          * doesn't cost extra time
4936          *
4937          * However, stretching in x direction can be avoided if not necessary
4938          */
4939         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4940             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4941             {
4942                 /* Well, that stuff works, but it's very slow.
4943                  * find a better way instead
4944                  */
4945                 UINT col;
4946
4947                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4948                 {
4949                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4950                             dst_rect.left + col /* x offset */, row /* y offset */,
4951                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4952                 }
4953             }
4954             else
4955             {
4956                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4957                         dst_rect.left /* x offset */, row /* y offset */,
4958                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4959             }
4960         }
4961     }
4962     checkGLcall("glCopyTexSubImage2D");
4963
4964     LEAVE_GL();
4965     context_release(context);
4966
4967     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4968      * path is never entered
4969      */
4970     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4971 }
4972
4973 /* Uses the hardware to stretch and flip the image */
4974 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4975         const RECT *src_rect, const RECT *dst_rect_in, enum wined3d_texture_filter_type filter)
4976 {
4977     struct wined3d_device *device = dst_surface->resource.device;
4978     struct wined3d_swapchain *src_swapchain = NULL;
4979     GLuint src, backup = 0;
4980     float left, right, top, bottom; /* Texture coordinates */
4981     UINT fbwidth = src_surface->resource.width;
4982     UINT fbheight = src_surface->resource.height;
4983     struct wined3d_context *context;
4984     GLenum drawBuffer = GL_BACK;
4985     GLenum texture_target;
4986     BOOL noBackBufferBackup;
4987     BOOL src_offscreen;
4988     BOOL upsidedown = FALSE;
4989     RECT dst_rect = *dst_rect_in;
4990
4991     TRACE("Using hwstretch blit\n");
4992     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4993     context = context_acquire(device, src_surface);
4994     context_apply_blit_state(context, device);
4995     surface_internal_preload(dst_surface, SRGB_RGB);
4996
4997     src_offscreen = surface_is_offscreen(src_surface);
4998     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4999     if (!noBackBufferBackup && !src_surface->texture_name)
5000     {
5001         /* Get it a description */
5002         surface_internal_preload(src_surface, SRGB_RGB);
5003     }
5004     ENTER_GL();
5005
5006     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
5007      * This way we don't have to wait for the 2nd readback to finish to leave this function.
5008      */
5009     if (context->aux_buffers >= 2)
5010     {
5011         /* Got more than one aux buffer? Use the 2nd aux buffer */
5012         drawBuffer = GL_AUX1;
5013     }
5014     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
5015     {
5016         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
5017         drawBuffer = GL_AUX0;
5018     }
5019
5020     if(noBackBufferBackup) {
5021         glGenTextures(1, &backup);
5022         checkGLcall("glGenTextures");
5023         context_bind_texture(context, GL_TEXTURE_2D, backup);
5024         texture_target = GL_TEXTURE_2D;
5025     } else {
5026         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
5027          * we are reading from the back buffer, the backup can be used as source texture
5028          */
5029         texture_target = src_surface->texture_target;
5030         context_bind_texture(context, texture_target, src_surface->texture_name);
5031         glEnable(texture_target);
5032         checkGLcall("glEnable(texture_target)");
5033
5034         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
5035         src_surface->flags &= ~SFLAG_INTEXTURE;
5036     }
5037
5038     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
5039      * glCopyTexSubImage is a bit picky about the parameters we pass to it
5040      */
5041     if(dst_rect.top > dst_rect.bottom) {
5042         UINT tmp = dst_rect.bottom;
5043         dst_rect.bottom = dst_rect.top;
5044         dst_rect.top = tmp;
5045         upsidedown = TRUE;
5046     }
5047
5048     if (src_offscreen)
5049     {
5050         TRACE("Reading from an offscreen target\n");
5051         upsidedown = !upsidedown;
5052         glReadBuffer(device->offscreenBuffer);
5053     }
5054     else
5055     {
5056         glReadBuffer(surface_get_gl_buffer(src_surface));
5057     }
5058
5059     /* TODO: Only back up the part that will be overwritten */
5060     glCopyTexSubImage2D(texture_target, 0,
5061                         0, 0 /* read offsets */,
5062                         0, 0,
5063                         fbwidth,
5064                         fbheight);
5065
5066     checkGLcall("glCopyTexSubImage2D");
5067
5068     /* No issue with overriding these - the sampler is dirty due to blit usage */
5069     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
5070             wined3d_gl_mag_filter(magLookup, filter));
5071     checkGLcall("glTexParameteri");
5072     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
5073             wined3d_gl_min_mip_filter(minMipLookup, filter, WINED3D_TEXF_NONE));
5074     checkGLcall("glTexParameteri");
5075
5076     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5077         src_swapchain = src_surface->container.u.swapchain;
5078     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
5079     {
5080         src = backup ? backup : src_surface->texture_name;
5081     }
5082     else
5083     {
5084         glReadBuffer(GL_FRONT);
5085         checkGLcall("glReadBuffer(GL_FRONT)");
5086
5087         glGenTextures(1, &src);
5088         checkGLcall("glGenTextures(1, &src)");
5089         context_bind_texture(context, GL_TEXTURE_2D, src);
5090
5091         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5092          * out for power of 2 sizes
5093          */
5094         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5095                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5096         checkGLcall("glTexImage2D");
5097         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
5098                             0, 0 /* read offsets */,
5099                             0, 0,
5100                             fbwidth,
5101                             fbheight);
5102
5103         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5104         checkGLcall("glTexParameteri");
5105         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5106         checkGLcall("glTexParameteri");
5107
5108         glReadBuffer(GL_BACK);
5109         checkGLcall("glReadBuffer(GL_BACK)");
5110
5111         if(texture_target != GL_TEXTURE_2D) {
5112             glDisable(texture_target);
5113             glEnable(GL_TEXTURE_2D);
5114             texture_target = GL_TEXTURE_2D;
5115         }
5116     }
5117     checkGLcall("glEnd and previous");
5118
5119     left = src_rect->left;
5120     right = src_rect->right;
5121
5122     if (!upsidedown)
5123     {
5124         top = src_surface->resource.height - src_rect->top;
5125         bottom = src_surface->resource.height - src_rect->bottom;
5126     }
5127     else
5128     {
5129         top = src_surface->resource.height - src_rect->bottom;
5130         bottom = src_surface->resource.height - src_rect->top;
5131     }
5132
5133     if (src_surface->flags & SFLAG_NORMCOORD)
5134     {
5135         left /= src_surface->pow2Width;
5136         right /= src_surface->pow2Width;
5137         top /= src_surface->pow2Height;
5138         bottom /= src_surface->pow2Height;
5139     }
5140
5141     /* draw the source texture stretched and upside down. The correct surface is bound already */
5142     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5143     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5144
5145     context_set_draw_buffer(context, drawBuffer);
5146     glReadBuffer(drawBuffer);
5147
5148     glBegin(GL_QUADS);
5149         /* bottom left */
5150         glTexCoord2f(left, bottom);
5151         glVertex2i(0, 0);
5152
5153         /* top left */
5154         glTexCoord2f(left, top);
5155         glVertex2i(0, dst_rect.bottom - dst_rect.top);
5156
5157         /* top right */
5158         glTexCoord2f(right, top);
5159         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5160
5161         /* bottom right */
5162         glTexCoord2f(right, bottom);
5163         glVertex2i(dst_rect.right - dst_rect.left, 0);
5164     glEnd();
5165     checkGLcall("glEnd and previous");
5166
5167     if (texture_target != dst_surface->texture_target)
5168     {
5169         glDisable(texture_target);
5170         glEnable(dst_surface->texture_target);
5171         texture_target = dst_surface->texture_target;
5172     }
5173
5174     /* Now read the stretched and upside down image into the destination texture */
5175     context_bind_texture(context, texture_target, dst_surface->texture_name);
5176     glCopyTexSubImage2D(texture_target,
5177                         0,
5178                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5179                         0, 0, /* We blitted the image to the origin */
5180                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5181     checkGLcall("glCopyTexSubImage2D");
5182
5183     if(drawBuffer == GL_BACK) {
5184         /* Write the back buffer backup back */
5185         if(backup) {
5186             if(texture_target != GL_TEXTURE_2D) {
5187                 glDisable(texture_target);
5188                 glEnable(GL_TEXTURE_2D);
5189                 texture_target = GL_TEXTURE_2D;
5190             }
5191             context_bind_texture(context, GL_TEXTURE_2D, backup);
5192         }
5193         else
5194         {
5195             if (texture_target != src_surface->texture_target)
5196             {
5197                 glDisable(texture_target);
5198                 glEnable(src_surface->texture_target);
5199                 texture_target = src_surface->texture_target;
5200             }
5201             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5202         }
5203
5204         glBegin(GL_QUADS);
5205             /* top left */
5206             glTexCoord2f(0.0f, 0.0f);
5207             glVertex2i(0, fbheight);
5208
5209             /* bottom left */
5210             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5211             glVertex2i(0, 0);
5212
5213             /* bottom right */
5214             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5215                     (float)fbheight / (float)src_surface->pow2Height);
5216             glVertex2i(fbwidth, 0);
5217
5218             /* top right */
5219             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5220             glVertex2i(fbwidth, fbheight);
5221         glEnd();
5222     }
5223     glDisable(texture_target);
5224     checkGLcall("glDisable(texture_target)");
5225
5226     /* Cleanup */
5227     if (src != src_surface->texture_name && src != backup)
5228     {
5229         glDeleteTextures(1, &src);
5230         checkGLcall("glDeleteTextures(1, &src)");
5231     }
5232     if(backup) {
5233         glDeleteTextures(1, &backup);
5234         checkGLcall("glDeleteTextures(1, &backup)");
5235     }
5236
5237     LEAVE_GL();
5238
5239     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5240
5241     context_release(context);
5242
5243     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5244      * path is never entered
5245      */
5246     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5247 }
5248
5249 /* Front buffer coordinates are always full screen coordinates, but our GL
5250  * drawable is limited to the window's client area. The sysmem and texture
5251  * copies do have the full screen size. Note that GL has a bottom-left
5252  * origin, while D3D has a top-left origin. */
5253 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5254 {
5255     UINT drawable_height;
5256
5257     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5258             && surface == surface->container.u.swapchain->front_buffer)
5259     {
5260         POINT offset = {0, 0};
5261         RECT windowsize;
5262
5263         ScreenToClient(window, &offset);
5264         OffsetRect(rect, offset.x, offset.y);
5265
5266         GetClientRect(window, &windowsize);
5267         drawable_height = windowsize.bottom - windowsize.top;
5268     }
5269     else
5270     {
5271         drawable_height = surface->resource.height;
5272     }
5273
5274     rect->top = drawable_height - rect->top;
5275     rect->bottom = drawable_height - rect->bottom;
5276 }
5277
5278 static void surface_blt_to_drawable(const struct wined3d_device *device,
5279         enum wined3d_texture_filter_type filter, BOOL color_key,
5280         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5281         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5282 {
5283     struct wined3d_context *context;
5284     RECT src_rect, dst_rect;
5285
5286     src_rect = *src_rect_in;
5287     dst_rect = *dst_rect_in;
5288
5289     /* Make sure the surface is up-to-date. This should probably use
5290      * surface_load_location() and worry about the destination surface too,
5291      * unless we're overwriting it completely. */
5292     surface_internal_preload(src_surface, SRGB_RGB);
5293
5294     /* Activate the destination context, set it up for blitting */
5295     context = context_acquire(device, dst_surface);
5296     context_apply_blit_state(context, device);
5297
5298     if (!surface_is_offscreen(dst_surface))
5299         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5300
5301     device->blitter->set_shader(device->blit_priv, context, src_surface);
5302
5303     ENTER_GL();
5304
5305     if (color_key)
5306     {
5307         glEnable(GL_ALPHA_TEST);
5308         checkGLcall("glEnable(GL_ALPHA_TEST)");
5309
5310         /* When the primary render target uses P8, the alpha component
5311          * contains the palette index. Which means that the colorkey is one of
5312          * the palette entries. In other cases pixels that should be masked
5313          * away have alpha set to 0. */
5314         if (primary_render_target_is_p8(device))
5315             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->src_blt_color_key.color_space_low_value / 256.0f);
5316         else
5317             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5318         checkGLcall("glAlphaFunc");
5319     }
5320     else
5321     {
5322         glDisable(GL_ALPHA_TEST);
5323         checkGLcall("glDisable(GL_ALPHA_TEST)");
5324     }
5325
5326     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5327
5328     if (color_key)
5329     {
5330         glDisable(GL_ALPHA_TEST);
5331         checkGLcall("glDisable(GL_ALPHA_TEST)");
5332     }
5333
5334     LEAVE_GL();
5335
5336     /* Leave the opengl state valid for blitting */
5337     device->blitter->unset_shader(context->gl_info);
5338
5339     if (wined3d_settings.strict_draw_ordering
5340             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5341             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5342         wglFlush(); /* Flush to ensure ordering across contexts. */
5343
5344     context_release(context);
5345 }
5346
5347 /* Do not call while under the GL lock. */
5348 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const struct wined3d_color *color)
5349 {
5350     struct wined3d_device *device = s->resource.device;
5351     const struct blit_shader *blitter;
5352
5353     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5354             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5355     if (!blitter)
5356     {
5357         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5358         return WINED3DERR_INVALIDCALL;
5359     }
5360
5361     return blitter->color_fill(device, s, rect, color);
5362 }
5363
5364 /* Do not call while under the GL lock. */
5365 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5366         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5367         enum wined3d_texture_filter_type filter)
5368 {
5369     struct wined3d_device *device = dst_surface->resource.device;
5370     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5371     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5372
5373     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5374             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5375             flags, DDBltFx, debug_d3dtexturefiltertype(filter));
5376
5377     /* Get the swapchain. One of the surfaces has to be a primary surface */
5378     if (dst_surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
5379     {
5380         WARN("Destination is in sysmem, rejecting gl blt\n");
5381         return WINED3DERR_INVALIDCALL;
5382     }
5383
5384     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5385         dstSwapchain = dst_surface->container.u.swapchain;
5386
5387     if (src_surface)
5388     {
5389         if (src_surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
5390         {
5391             WARN("Src is in sysmem, rejecting gl blt\n");
5392             return WINED3DERR_INVALIDCALL;
5393         }
5394
5395         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5396             srcSwapchain = src_surface->container.u.swapchain;
5397     }
5398
5399     /* Early sort out of cases where no render target is used */
5400     if (!dstSwapchain && !srcSwapchain
5401             && src_surface != device->fb.render_targets[0]
5402             && dst_surface != device->fb.render_targets[0])
5403     {
5404         TRACE("No surface is render target, not using hardware blit.\n");
5405         return WINED3DERR_INVALIDCALL;
5406     }
5407
5408     /* No destination color keying supported */
5409     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5410     {
5411         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5412         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5413         return WINED3DERR_INVALIDCALL;
5414     }
5415
5416     if (dstSwapchain && dstSwapchain == srcSwapchain)
5417     {
5418         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5419         return WINED3DERR_INVALIDCALL;
5420     }
5421
5422     if (dstSwapchain && srcSwapchain)
5423     {
5424         FIXME("Implement hardware blit between two different swapchains\n");
5425         return WINED3DERR_INVALIDCALL;
5426     }
5427
5428     if (dstSwapchain)
5429     {
5430         /* Handled with regular texture -> swapchain blit */
5431         if (src_surface == device->fb.render_targets[0])
5432             TRACE("Blit from active render target to a swapchain\n");
5433     }
5434     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5435     {
5436         FIXME("Implement blit from a swapchain to the active render target\n");
5437         return WINED3DERR_INVALIDCALL;
5438     }
5439
5440     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5441     {
5442         /* Blit from render target to texture */
5443         BOOL stretchx;
5444
5445         /* P8 read back is not implemented */
5446         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5447                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5448         {
5449             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5450             return WINED3DERR_INVALIDCALL;
5451         }
5452
5453         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5454         {
5455             TRACE("Color keying not supported by frame buffer to texture blit\n");
5456             return WINED3DERR_INVALIDCALL;
5457             /* Destination color key is checked above */
5458         }
5459
5460         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5461             stretchx = TRUE;
5462         else
5463             stretchx = FALSE;
5464
5465         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5466          * flip the image nor scale it.
5467          *
5468          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5469          * -> If the app wants a image width an unscaled width, copy it line per line
5470          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5471          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5472          *    back buffer. This is slower than reading line per line, thus not used for flipping
5473          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5474          *    pixel by pixel. */
5475         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5476                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5477         {
5478             TRACE("No stretching in x direction, using direct framebuffer -> texture copy.\n");
5479             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, filter);
5480         }
5481         else
5482         {
5483             TRACE("Using hardware stretching to flip / stretch the texture.\n");
5484             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, filter);
5485         }
5486
5487         if (!dst_surface->resource.map_count && !(dst_surface->flags & SFLAG_DONOTFREE))
5488         {
5489             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5490             dst_surface->resource.allocatedMemory = NULL;
5491             dst_surface->resource.heapMemory = NULL;
5492         }
5493         else
5494         {
5495             dst_surface->flags &= ~SFLAG_INSYSMEM;
5496         }
5497
5498         return WINED3D_OK;
5499     }
5500     else if (src_surface)
5501     {
5502         /* Blit from offscreen surface to render target */
5503         struct wined3d_color_key old_blt_key = src_surface->src_blt_color_key;
5504         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5505
5506         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5507
5508         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5509                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5510                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5511         {
5512             FIXME("Unsupported blit operation falling back to software\n");
5513             return WINED3DERR_INVALIDCALL;
5514         }
5515
5516         /* Color keying: Check if we have to do a color keyed blt,
5517          * and if not check if a color key is activated.
5518          *
5519          * Just modify the color keying parameters in the surface and restore them afterwards
5520          * The surface keeps track of the color key last used to load the opengl surface.
5521          * PreLoad will catch the change to the flags and color key and reload if necessary.
5522          */
5523         if (flags & WINEDDBLT_KEYSRC)
5524         {
5525             /* Use color key from surface */
5526         }
5527         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5528         {
5529             /* Use color key from DDBltFx */
5530             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5531             src_surface->src_blt_color_key = DDBltFx->ddckSrcColorkey;
5532         }
5533         else
5534         {
5535             /* Do not use color key */
5536             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5537         }
5538
5539         surface_blt_to_drawable(device, filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5540                 src_surface, src_rect, dst_surface, dst_rect);
5541
5542         /* Restore the color key parameters */
5543         src_surface->CKeyFlags = oldCKeyFlags;
5544         src_surface->src_blt_color_key = old_blt_key;
5545
5546         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5547
5548         return WINED3D_OK;
5549     }
5550
5551     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5552     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5553     return WINED3DERR_INVALIDCALL;
5554 }
5555
5556 /* GL locking is done by the caller */
5557 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5558         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5559 {
5560     struct wined3d_device *device = surface->resource.device;
5561     const struct wined3d_gl_info *gl_info = context->gl_info;
5562     GLint compare_mode = GL_NONE;
5563     struct blt_info info;
5564     GLint old_binding = 0;
5565     RECT rect;
5566
5567     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5568
5569     glDisable(GL_CULL_FACE);
5570     glDisable(GL_BLEND);
5571     glDisable(GL_ALPHA_TEST);
5572     glDisable(GL_SCISSOR_TEST);
5573     glDisable(GL_STENCIL_TEST);
5574     glEnable(GL_DEPTH_TEST);
5575     glDepthFunc(GL_ALWAYS);
5576     glDepthMask(GL_TRUE);
5577     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5578     glViewport(x, y, w, h);
5579     glDepthRange(0.0, 1.0);
5580
5581     SetRect(&rect, 0, h, w, 0);
5582     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5583     context_active_texture(context, context->gl_info, 0);
5584     glGetIntegerv(info.binding, &old_binding);
5585     glBindTexture(info.bind_target, texture);
5586     if (gl_info->supported[ARB_SHADOW])
5587     {
5588         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5589         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5590     }
5591
5592     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5593             gl_info, info.tex_type, &surface->ds_current_size);
5594
5595     glBegin(GL_TRIANGLE_STRIP);
5596     glTexCoord3fv(info.coords[0]);
5597     glVertex2f(-1.0f, -1.0f);
5598     glTexCoord3fv(info.coords[1]);
5599     glVertex2f(1.0f, -1.0f);
5600     glTexCoord3fv(info.coords[2]);
5601     glVertex2f(-1.0f, 1.0f);
5602     glTexCoord3fv(info.coords[3]);
5603     glVertex2f(1.0f, 1.0f);
5604     glEnd();
5605
5606     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5607     glBindTexture(info.bind_target, old_binding);
5608
5609     glPopAttrib();
5610
5611     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5612 }
5613
5614 void surface_modify_ds_location(struct wined3d_surface *surface,
5615         DWORD location, UINT w, UINT h)
5616 {
5617     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5618
5619     if (location & ~(SFLAG_LOCATIONS | SFLAG_DISCARDED))
5620         FIXME("Invalid location (%#x) specified.\n", location);
5621
5622     if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5623             || (!(surface->flags & SFLAG_INTEXTURE) && (location & SFLAG_INTEXTURE)))
5624     {
5625         if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5626         {
5627             TRACE("Passing to container.\n");
5628             wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5629         }
5630     }
5631
5632     surface->ds_current_size.cx = w;
5633     surface->ds_current_size.cy = h;
5634     surface->flags &= ~(SFLAG_LOCATIONS | SFLAG_DISCARDED);
5635     surface->flags |= location;
5636 }
5637
5638 /* Context activation is done by the caller. */
5639 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5640 {
5641     struct wined3d_device *device = surface->resource.device;
5642     GLsizei w, h;
5643
5644     TRACE("surface %p, new location %#x.\n", surface, location);
5645
5646     /* TODO: Make this work for modes other than FBO */
5647     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5648
5649     if (!(surface->flags & location))
5650     {
5651         w = surface->ds_current_size.cx;
5652         h = surface->ds_current_size.cy;
5653         surface->ds_current_size.cx = 0;
5654         surface->ds_current_size.cy = 0;
5655     }
5656     else
5657     {
5658         w = surface->resource.width;
5659         h = surface->resource.height;
5660     }
5661
5662     if (surface->ds_current_size.cx == surface->resource.width
5663             && surface->ds_current_size.cy == surface->resource.height)
5664     {
5665         TRACE("Location (%#x) is already up to date.\n", location);
5666         return;
5667     }
5668
5669     if (surface->current_renderbuffer)
5670     {
5671         FIXME("Not supported with fixed up depth stencil.\n");
5672         return;
5673     }
5674
5675     if (surface->flags & SFLAG_DISCARDED)
5676     {
5677         TRACE("Surface was discarded, no need copy data.\n");
5678         switch (location)
5679         {
5680             case SFLAG_INTEXTURE:
5681                 surface_prepare_texture(surface, context, FALSE);
5682                 break;
5683             case SFLAG_INRB_MULTISAMPLE:
5684                 surface_prepare_rb(surface, context->gl_info, TRUE);
5685                 break;
5686             case SFLAG_INDRAWABLE:
5687                 /* Nothing to do */
5688                 break;
5689             default:
5690                 FIXME("Unhandled location %#x\n", location);
5691         }
5692         surface->flags &= ~SFLAG_DISCARDED;
5693         surface->flags |= location;
5694         surface->ds_current_size.cx = surface->resource.width;
5695         surface->ds_current_size.cy = surface->resource.height;
5696         return;
5697     }
5698
5699     if (!(surface->flags & SFLAG_LOCATIONS))
5700     {
5701         FIXME("No up to date depth stencil location.\n");
5702         surface->flags |= location;
5703         surface->ds_current_size.cx = surface->resource.width;
5704         surface->ds_current_size.cy = surface->resource.height;
5705         return;
5706     }
5707
5708     if (location == SFLAG_INTEXTURE)
5709     {
5710         GLint old_binding = 0;
5711         GLenum bind_target;
5712
5713         /* The render target is allowed to be smaller than the depth/stencil
5714          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5715          * than the offscreen surface. Don't overwrite the offscreen surface
5716          * with undefined data. */
5717         w = min(w, context->swapchain->desc.backbuffer_width);
5718         h = min(h, context->swapchain->desc.backbuffer_height);
5719
5720         TRACE("Copying onscreen depth buffer to depth texture.\n");
5721
5722         ENTER_GL();
5723
5724         if (!device->depth_blt_texture)
5725         {
5726             glGenTextures(1, &device->depth_blt_texture);
5727         }
5728
5729         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5730          * directly on the FBO texture. That's because we need to flip. */
5731         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5732                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5733         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5734         {
5735             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5736             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5737         }
5738         else
5739         {
5740             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5741             bind_target = GL_TEXTURE_2D;
5742         }
5743         glBindTexture(bind_target, device->depth_blt_texture);
5744         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5745          * internal format, because the internal format might include stencil
5746          * data. In principle we should copy stencil data as well, but unless
5747          * the driver supports stencil export it's hard to do, and doesn't
5748          * seem to be needed in practice. If the hardware doesn't support
5749          * writing stencil data, the glCopyTexImage2D() call might trigger
5750          * software fallbacks. */
5751         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5752         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5753         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5754         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5755         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5756         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5757         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5758         glBindTexture(bind_target, old_binding);
5759
5760         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5761                 NULL, surface, SFLAG_INTEXTURE);
5762         context_set_draw_buffer(context, GL_NONE);
5763         glReadBuffer(GL_NONE);
5764
5765         /* Do the actual blit */
5766         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5767         checkGLcall("depth_blt");
5768
5769         context_invalidate_state(context, STATE_FRAMEBUFFER);
5770
5771         LEAVE_GL();
5772
5773         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5774     }
5775     else if (location == SFLAG_INDRAWABLE)
5776     {
5777         TRACE("Copying depth texture to onscreen depth buffer.\n");
5778
5779         ENTER_GL();
5780
5781         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5782                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5783         surface_depth_blt(surface, context, surface->texture_name,
5784                 0, surface->pow2Height - h, w, h, surface->texture_target);
5785         checkGLcall("depth_blt");
5786
5787         context_invalidate_state(context, STATE_FRAMEBUFFER);
5788
5789         LEAVE_GL();
5790
5791         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5792     }
5793     else
5794     {
5795         ERR("Invalid location (%#x) specified.\n", location);
5796     }
5797
5798     surface->flags |= location;
5799     surface->ds_current_size.cx = surface->resource.width;
5800     surface->ds_current_size.cy = surface->resource.height;
5801 }
5802
5803 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5804 {
5805     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5806     struct wined3d_surface *overlay;
5807
5808     TRACE("surface %p, location %s, persistent %#x.\n",
5809             surface, debug_surflocation(location), persistent);
5810
5811     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5812             && !(surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
5813             && (location & SFLAG_INDRAWABLE))
5814         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5815
5816     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5817             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5818         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5819
5820     if (persistent)
5821     {
5822         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5823                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5824         {
5825             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5826             {
5827                 TRACE("Passing to container.\n");
5828                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5829             }
5830         }
5831         surface->flags &= ~SFLAG_LOCATIONS;
5832         surface->flags |= location;
5833
5834         /* Redraw emulated overlays, if any */
5835         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5836         {
5837             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5838             {
5839                 surface_draw_overlay(overlay);
5840             }
5841         }
5842     }
5843     else
5844     {
5845         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5846         {
5847             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5848             {
5849                 TRACE("Passing to container\n");
5850                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5851             }
5852         }
5853         surface->flags &= ~location;
5854     }
5855
5856     if (!(surface->flags & SFLAG_LOCATIONS))
5857     {
5858         ERR("Surface %p does not have any up to date location.\n", surface);
5859     }
5860 }
5861
5862 static DWORD resource_access_from_location(DWORD location)
5863 {
5864     switch (location)
5865     {
5866         case SFLAG_INSYSMEM:
5867             return WINED3D_RESOURCE_ACCESS_CPU;
5868
5869         case SFLAG_INDRAWABLE:
5870         case SFLAG_INSRGBTEX:
5871         case SFLAG_INTEXTURE:
5872         case SFLAG_INRB_MULTISAMPLE:
5873         case SFLAG_INRB_RESOLVED:
5874             return WINED3D_RESOURCE_ACCESS_GPU;
5875
5876         default:
5877             FIXME("Unhandled location %#x.\n", location);
5878             return 0;
5879     }
5880 }
5881
5882 static void surface_load_sysmem(struct wined3d_surface *surface,
5883         const struct wined3d_gl_info *gl_info, const RECT *rect)
5884 {
5885     surface_prepare_system_memory(surface);
5886
5887     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED))
5888         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5889
5890     /* Download the surface to system memory. */
5891     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5892     {
5893         struct wined3d_device *device = surface->resource.device;
5894         struct wined3d_context *context;
5895
5896         /* TODO: Use already acquired context when possible. */
5897         context = context_acquire(device, NULL);
5898
5899         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5900         surface_download_data(surface, gl_info);
5901
5902         context_release(context);
5903
5904         return;
5905     }
5906
5907     if (surface->flags & SFLAG_INDRAWABLE)
5908     {
5909         read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5910                 wined3d_surface_get_pitch(surface));
5911         return;
5912     }
5913
5914     FIXME("Can't load surface %p with location flags %#x into sysmem.\n",
5915             surface, surface->flags & SFLAG_LOCATIONS);
5916 }
5917
5918 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5919         const struct wined3d_gl_info *gl_info, const RECT *rect)
5920 {
5921     struct wined3d_device *device = surface->resource.device;
5922     enum wined3d_conversion_type convert;
5923     struct wined3d_format format;
5924     UINT byte_count;
5925     BYTE *mem;
5926
5927     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5928     {
5929         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5930         return WINED3DERR_INVALIDCALL;
5931     }
5932
5933     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5934         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5935
5936     if (surface->flags & SFLAG_INTEXTURE)
5937     {
5938         RECT r;
5939
5940         surface_get_rect(surface, rect, &r);
5941         surface_blt_to_drawable(device, WINED3D_TEXF_POINT, FALSE, surface, &r, surface, &r);
5942
5943         return WINED3D_OK;
5944     }
5945
5946     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5947     {
5948         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5949          * path through sysmem. */
5950         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5951     }
5952
5953     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5954
5955     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5956      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5957      * called. */
5958     if ((convert != WINED3D_CT_NONE) && (surface->flags & SFLAG_PBO))
5959     {
5960         struct wined3d_context *context;
5961
5962         TRACE("Removing the pbo attached to surface %p.\n", surface);
5963
5964         /* TODO: Use already acquired context when possible. */
5965         context = context_acquire(device, NULL);
5966
5967         surface_remove_pbo(surface, gl_info);
5968
5969         context_release(context);
5970     }
5971
5972     if ((convert != WINED3D_CT_NONE) && surface->resource.allocatedMemory)
5973     {
5974         UINT height = surface->resource.height;
5975         UINT width = surface->resource.width;
5976         UINT src_pitch, dst_pitch;
5977
5978         byte_count = format.conv_byte_count;
5979         src_pitch = wined3d_surface_get_pitch(surface);
5980
5981         /* Stick to the alignment for the converted surface too, makes it
5982          * easier to load the surface. */
5983         dst_pitch = width * byte_count;
5984         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5985
5986         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5987         {
5988             ERR("Out of memory (%u).\n", dst_pitch * height);
5989             return E_OUTOFMEMORY;
5990         }
5991
5992         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5993                 src_pitch, width, height, dst_pitch, convert, surface);
5994
5995         surface->flags |= SFLAG_CONVERTED;
5996     }
5997     else
5998     {
5999         surface->flags &= ~SFLAG_CONVERTED;
6000         mem = surface->resource.allocatedMemory;
6001         byte_count = format.byte_count;
6002     }
6003
6004     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
6005
6006     /* Don't delete PBO memory. */
6007     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6008         HeapFree(GetProcessHeap(), 0, mem);
6009
6010     return WINED3D_OK;
6011 }
6012
6013 static HRESULT surface_load_texture(struct wined3d_surface *surface,
6014         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
6015 {
6016     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
6017     struct wined3d_device *device = surface->resource.device;
6018     enum wined3d_conversion_type convert;
6019     struct wined3d_context *context;
6020     UINT width, src_pitch, dst_pitch;
6021     struct wined3d_bo_address data;
6022     struct wined3d_format format;
6023     POINT dst_point = {0, 0};
6024     BYTE *mem;
6025
6026     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
6027             && surface_is_offscreen(surface)
6028             && (surface->flags & SFLAG_INDRAWABLE))
6029     {
6030         surface_load_fb_texture(surface, srgb);
6031
6032         return WINED3D_OK;
6033     }
6034
6035     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
6036             && (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB)
6037             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6038                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6039                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6040     {
6041         if (srgb)
6042             surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, SFLAG_INTEXTURE,
6043                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
6044         else
6045             surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, SFLAG_INSRGBTEX,
6046                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
6047
6048         return WINED3D_OK;
6049     }
6050
6051     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED)
6052             && (!srgb || (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB))
6053             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6054                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6055                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6056     {
6057         DWORD src_location = surface->flags & SFLAG_INRB_RESOLVED ? SFLAG_INRB_RESOLVED : SFLAG_INRB_MULTISAMPLE;
6058         DWORD dst_location = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
6059         RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6060
6061         surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, src_location,
6062                 &rect, surface, dst_location, &rect);
6063
6064         return WINED3D_OK;
6065     }
6066
6067     /* Upload from system memory */
6068
6069     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
6070             TRUE /* We will use textures */, &format, &convert);
6071
6072     if (srgb)
6073     {
6074         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
6075         {
6076             /* Performance warning... */
6077             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6078             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6079         }
6080     }
6081     else
6082     {
6083         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6084         {
6085             /* Performance warning... */
6086             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6087             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6088         }
6089     }
6090
6091     if (!(surface->flags & SFLAG_INSYSMEM))
6092     {
6093         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6094         /* Lets hope we get it from somewhere... */
6095         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6096     }
6097
6098     /* TODO: Use already acquired context when possible. */
6099     context = context_acquire(device, NULL);
6100
6101     surface_prepare_texture(surface, context, srgb);
6102     surface_bind_and_dirtify(surface, context, srgb);
6103
6104     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6105     {
6106         surface->flags |= SFLAG_GLCKEY;
6107         surface->gl_color_key = surface->src_blt_color_key;
6108     }
6109     else surface->flags &= ~SFLAG_GLCKEY;
6110
6111     width = surface->resource.width;
6112     src_pitch = wined3d_surface_get_pitch(surface);
6113
6114     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6115      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6116      * called. */
6117     if ((convert != WINED3D_CT_NONE || format.convert) && (surface->flags & SFLAG_PBO))
6118     {
6119         TRACE("Removing the pbo attached to surface %p.\n", surface);
6120         surface_remove_pbo(surface, gl_info);
6121     }
6122
6123     if (format.convert)
6124     {
6125         /* This code is entered for texture formats which need a fixup. */
6126         UINT height = surface->resource.height;
6127
6128         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6129         dst_pitch = width * format.conv_byte_count;
6130         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6131
6132         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6133         {
6134             ERR("Out of memory (%u).\n", dst_pitch * height);
6135             context_release(context);
6136             return E_OUTOFMEMORY;
6137         }
6138         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6139         format.byte_count = format.conv_byte_count;
6140         src_pitch = dst_pitch;
6141     }
6142     else if (convert != WINED3D_CT_NONE && surface->resource.allocatedMemory)
6143     {
6144         /* This code is only entered for color keying fixups */
6145         UINT height = surface->resource.height;
6146
6147         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6148         dst_pitch = width * format.conv_byte_count;
6149         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6150
6151         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6152         {
6153             ERR("Out of memory (%u).\n", dst_pitch * height);
6154             context_release(context);
6155             return E_OUTOFMEMORY;
6156         }
6157         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6158                 width, height, dst_pitch, convert, surface);
6159         format.byte_count = format.conv_byte_count;
6160         src_pitch = dst_pitch;
6161     }
6162     else
6163     {
6164         mem = surface->resource.allocatedMemory;
6165     }
6166
6167     data.buffer_object = surface->pbo;
6168     data.addr = mem;
6169     surface_upload_data(surface, gl_info, &format, &src_rect, src_pitch, &dst_point, srgb, &data);
6170
6171     context_release(context);
6172
6173     /* Don't delete PBO memory. */
6174     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6175         HeapFree(GetProcessHeap(), 0, mem);
6176
6177     return WINED3D_OK;
6178 }
6179
6180 static void surface_multisample_resolve(struct wined3d_surface *surface)
6181 {
6182     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6183
6184     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6185         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6186
6187     surface_blt_fbo(surface->resource.device, WINED3D_TEXF_POINT,
6188             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6189 }
6190
6191 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6192 {
6193     struct wined3d_device *device = surface->resource.device;
6194     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6195     HRESULT hr;
6196
6197     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6198
6199     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6200     {
6201         if (location == SFLAG_INTEXTURE)
6202         {
6203             struct wined3d_context *context = context_acquire(device, NULL);
6204             surface_load_ds_location(surface, context, location);
6205             context_release(context);
6206             return WINED3D_OK;
6207         }
6208         else
6209         {
6210             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6211             return WINED3DERR_INVALIDCALL;
6212         }
6213     }
6214
6215     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6216         location = SFLAG_INTEXTURE;
6217
6218     if (surface->flags & location)
6219     {
6220         TRACE("Location already up to date.\n");
6221
6222         if (location == SFLAG_INSYSMEM && !(surface->flags & SFLAG_PBO)
6223                 && surface_need_pbo(surface, gl_info))
6224             surface_load_pbo(surface, gl_info);
6225
6226         return WINED3D_OK;
6227     }
6228
6229     if (WARN_ON(d3d_surface))
6230     {
6231         DWORD required_access = resource_access_from_location(location);
6232         if ((surface->resource.access_flags & required_access) != required_access)
6233             WARN("Operation requires %#x access, but surface only has %#x.\n",
6234                     required_access, surface->resource.access_flags);
6235     }
6236
6237     if (!(surface->flags & SFLAG_LOCATIONS))
6238     {
6239         ERR("Surface %p does not have any up to date location.\n", surface);
6240         surface->flags |= SFLAG_LOST;
6241         return WINED3DERR_DEVICELOST;
6242     }
6243
6244     switch (location)
6245     {
6246         case SFLAG_INSYSMEM:
6247             surface_load_sysmem(surface, gl_info, rect);
6248             break;
6249
6250         case SFLAG_INDRAWABLE:
6251             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6252                 return hr;
6253             break;
6254
6255         case SFLAG_INRB_RESOLVED:
6256             surface_multisample_resolve(surface);
6257             break;
6258
6259         case SFLAG_INTEXTURE:
6260         case SFLAG_INSRGBTEX:
6261             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6262                 return hr;
6263             break;
6264
6265         default:
6266             ERR("Don't know how to handle location %#x.\n", location);
6267             break;
6268     }
6269
6270     if (!rect)
6271     {
6272         surface->flags |= location;
6273
6274         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6275             surface_evict_sysmem(surface);
6276     }
6277
6278     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6279             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6280     {
6281         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6282     }
6283
6284     return WINED3D_OK;
6285 }
6286
6287 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6288 {
6289     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6290
6291     /* Not on a swapchain - must be offscreen */
6292     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6293
6294     /* The front buffer is always onscreen */
6295     if (surface == swapchain->front_buffer) return FALSE;
6296
6297     /* If the swapchain is rendered to an FBO, the backbuffer is
6298      * offscreen, otherwise onscreen */
6299     return swapchain->render_to_fbo;
6300 }
6301
6302 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6303 /* Context activation is done by the caller. */
6304 static void ffp_blit_free(struct wined3d_device *device) { }
6305
6306 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6307 /* Context activation is done by the caller. */
6308 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6309 {
6310     BYTE table[256][4];
6311     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6312
6313     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6314
6315     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6316     ENTER_GL();
6317     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6318     LEAVE_GL();
6319 }
6320
6321 /* Context activation is done by the caller. */
6322 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6323 {
6324     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6325
6326     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6327      * else the surface is converted in software at upload time in LoadLocation.
6328      */
6329     if (!(surface->flags & SFLAG_CONVERTED) && fixup == COMPLEX_FIXUP_P8
6330             && context->gl_info->supported[EXT_PALETTED_TEXTURE])
6331         ffp_blit_p8_upload_palette(surface, context->gl_info);
6332
6333     ENTER_GL();
6334     glEnable(surface->texture_target);
6335     checkGLcall("glEnable(surface->texture_target)");
6336     LEAVE_GL();
6337     return WINED3D_OK;
6338 }
6339
6340 /* Context activation is done by the caller. */
6341 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6342 {
6343     ENTER_GL();
6344     glDisable(GL_TEXTURE_2D);
6345     checkGLcall("glDisable(GL_TEXTURE_2D)");
6346     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6347     {
6348         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6349         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6350     }
6351     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6352     {
6353         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6354         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6355     }
6356     LEAVE_GL();
6357 }
6358
6359 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6360         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
6361         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
6362 {
6363     enum complex_fixup src_fixup;
6364
6365     switch (blit_op)
6366     {
6367         case WINED3D_BLIT_OP_COLOR_BLIT:
6368             if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM)
6369                 return FALSE;
6370
6371             src_fixup = get_complex_fixup(src_format->color_fixup);
6372             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6373             {
6374                 TRACE("Checking support for fixup:\n");
6375                 dump_color_fixup_desc(src_format->color_fixup);
6376             }
6377
6378             if (!is_identity_fixup(dst_format->color_fixup))
6379             {
6380                 TRACE("Destination fixups are not supported\n");
6381                 return FALSE;
6382             }
6383
6384             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6385             {
6386                 TRACE("P8 fixup supported\n");
6387                 return TRUE;
6388             }
6389
6390             /* We only support identity conversions. */
6391             if (is_identity_fixup(src_format->color_fixup))
6392             {
6393                 TRACE("[OK]\n");
6394                 return TRUE;
6395             }
6396
6397             TRACE("[FAILED]\n");
6398             return FALSE;
6399
6400         case WINED3D_BLIT_OP_COLOR_FILL:
6401             if (dst_pool == WINED3D_POOL_SYSTEM_MEM)
6402                 return FALSE;
6403
6404             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6405             {
6406                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6407                     return FALSE;
6408             }
6409             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6410             {
6411                 TRACE("Color fill not supported\n");
6412                 return FALSE;
6413             }
6414
6415             /* FIXME: We should reject color fills on formats with fixups,
6416              * but this would break P8 color fills for example. */
6417
6418             return TRUE;
6419
6420         case WINED3D_BLIT_OP_DEPTH_FILL:
6421             return TRUE;
6422
6423         default:
6424             TRACE("Unsupported blit_op=%d\n", blit_op);
6425             return FALSE;
6426     }
6427 }
6428
6429 /* Do not call while under the GL lock. */
6430 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6431         const RECT *dst_rect, const struct wined3d_color *color)
6432 {
6433     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6434     struct wined3d_fb_state fb = {&dst_surface, NULL};
6435
6436     device_clear_render_targets(device, 1, &fb, 1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6437
6438     return WINED3D_OK;
6439 }
6440
6441 /* Do not call while under the GL lock. */
6442 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6443         struct wined3d_surface *surface, const RECT *rect, float depth)
6444 {
6445     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6446     struct wined3d_fb_state fb = {NULL, surface};
6447
6448     device_clear_render_targets(device, 0, &fb, 1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6449
6450     return WINED3D_OK;
6451 }
6452
6453 const struct blit_shader ffp_blit =  {
6454     ffp_blit_alloc,
6455     ffp_blit_free,
6456     ffp_blit_set,
6457     ffp_blit_unset,
6458     ffp_blit_supported,
6459     ffp_blit_color_fill,
6460     ffp_blit_depth_fill,
6461 };
6462
6463 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6464 {
6465     return WINED3D_OK;
6466 }
6467
6468 /* Context activation is done by the caller. */
6469 static void cpu_blit_free(struct wined3d_device *device)
6470 {
6471 }
6472
6473 /* Context activation is done by the caller. */
6474 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6475 {
6476     return WINED3D_OK;
6477 }
6478
6479 /* Context activation is done by the caller. */
6480 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6481 {
6482 }
6483
6484 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6485         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
6486         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
6487 {
6488     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6489     {
6490         return TRUE;
6491     }
6492
6493     return FALSE;
6494 }
6495
6496 static HRESULT surface_cpu_blt_compressed(const BYTE *src_data, BYTE *dst_data,
6497         UINT src_pitch, UINT dst_pitch, UINT update_w, UINT update_h,
6498         const struct wined3d_format *format, DWORD flags, const WINEDDBLTFX *fx)
6499 {
6500     UINT row_block_count;
6501     const BYTE *src_row;
6502     BYTE *dst_row;
6503     UINT x, y;
6504
6505     src_row = src_data;
6506     dst_row = dst_data;
6507
6508     row_block_count = (update_w + format->block_width - 1) / format->block_width;
6509
6510     if (!flags)
6511     {
6512         for (y = 0; y < update_h; y += format->block_height)
6513         {
6514             memcpy(dst_row, src_row, row_block_count * format->block_byte_count);
6515             src_row += src_pitch;
6516             dst_row += dst_pitch;
6517         }
6518
6519         return WINED3D_OK;
6520     }
6521
6522     if (flags == WINEDDBLT_DDFX && fx->dwDDFX == WINEDDBLTFX_MIRRORUPDOWN)
6523     {
6524         src_row += (((update_h / format->block_height) - 1) * src_pitch);
6525
6526         switch (format->id)
6527         {
6528             case WINED3DFMT_DXT1:
6529                 for (y = 0; y < update_h; y += format->block_height)
6530                 {
6531                     struct block
6532                     {
6533                         WORD color[2];
6534                         BYTE control_row[4];
6535                     };
6536
6537                     const struct block *s = (const struct block *)src_row;
6538                     struct block *d = (struct block *)dst_row;
6539
6540                     for (x = 0; x < row_block_count; ++x)
6541                     {
6542                         d[x].color[0] = s[x].color[0];
6543                         d[x].color[1] = s[x].color[1];
6544                         d[x].control_row[0] = s[x].control_row[3];
6545                         d[x].control_row[1] = s[x].control_row[2];
6546                         d[x].control_row[2] = s[x].control_row[1];
6547                         d[x].control_row[3] = s[x].control_row[0];
6548                     }
6549                     src_row -= src_pitch;
6550                     dst_row += dst_pitch;
6551                 }
6552                 return WINED3D_OK;
6553
6554             case WINED3DFMT_DXT3:
6555                 for (y = 0; y < update_h; y += format->block_height)
6556                 {
6557                     struct block
6558                     {
6559                         WORD alpha_row[4];
6560                         WORD color[2];
6561                         BYTE control_row[4];
6562                     };
6563
6564                     const struct block *s = (const struct block *)src_row;
6565                     struct block *d = (struct block *)dst_row;
6566
6567                     for (x = 0; x < row_block_count; ++x)
6568                     {
6569                         d[x].alpha_row[0] = s[x].alpha_row[3];
6570                         d[x].alpha_row[1] = s[x].alpha_row[2];
6571                         d[x].alpha_row[2] = s[x].alpha_row[1];
6572                         d[x].alpha_row[3] = s[x].alpha_row[0];
6573                         d[x].color[0] = s[x].color[0];
6574                         d[x].color[1] = s[x].color[1];
6575                         d[x].control_row[0] = s[x].control_row[3];
6576                         d[x].control_row[1] = s[x].control_row[2];
6577                         d[x].control_row[2] = s[x].control_row[1];
6578                         d[x].control_row[3] = s[x].control_row[0];
6579                     }
6580                     src_row -= src_pitch;
6581                     dst_row += dst_pitch;
6582                 }
6583                 return WINED3D_OK;
6584
6585             default:
6586                 FIXME("Compressed flip not implemented for format %s.\n",
6587                         debug_d3dformat(format->id));
6588                 return E_NOTIMPL;
6589         }
6590     }
6591
6592     FIXME("Unsupported blit on compressed surface (format %s, flags %#x, DDFX %#x).\n",
6593             debug_d3dformat(format->id), flags, flags & WINEDDBLT_DDFX ? fx->dwDDFX : 0);
6594
6595     return E_NOTIMPL;
6596 }
6597
6598 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6599         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6600         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter)
6601 {
6602     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6603     const struct wined3d_format *src_format, *dst_format;
6604     struct wined3d_surface *orig_src = src_surface;
6605     struct wined3d_map_desc dst_map, src_map;
6606     const BYTE *sbase = NULL;
6607     HRESULT hr = WINED3D_OK;
6608     const BYTE *sbuf;
6609     BYTE *dbuf;
6610     int x, y;
6611
6612     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6613             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6614             flags, fx, debug_d3dtexturefiltertype(filter));
6615
6616     if (src_surface == dst_surface)
6617     {
6618         wined3d_surface_map(dst_surface, &dst_map, NULL, 0);
6619         src_map = dst_map;
6620         src_format = dst_surface->resource.format;
6621         dst_format = src_format;
6622     }
6623     else
6624     {
6625         dst_format = dst_surface->resource.format;
6626         if (src_surface)
6627         {
6628             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6629             {
6630                 src_surface = surface_convert_format(src_surface, dst_format->id);
6631                 if (!src_surface)
6632                 {
6633                     /* The conv function writes a FIXME */
6634                     WARN("Cannot convert source surface format to dest format.\n");
6635                     goto release;
6636                 }
6637             }
6638             wined3d_surface_map(src_surface, &src_map, NULL, WINED3D_MAP_READONLY);
6639             src_format = src_surface->resource.format;
6640         }
6641         else
6642         {
6643             src_format = dst_format;
6644         }
6645
6646         wined3d_surface_map(dst_surface, &dst_map, dst_rect, 0);
6647     }
6648
6649     bpp = dst_surface->resource.format->byte_count;
6650     srcheight = src_rect->bottom - src_rect->top;
6651     srcwidth = src_rect->right - src_rect->left;
6652     dstheight = dst_rect->bottom - dst_rect->top;
6653     dstwidth = dst_rect->right - dst_rect->left;
6654     width = (dst_rect->right - dst_rect->left) * bpp;
6655
6656     if (src_surface)
6657         sbase = (BYTE *)src_map.data
6658                 + ((src_rect->top / src_format->block_height) * src_map.row_pitch)
6659                 + ((src_rect->left / src_format->block_width) * src_format->block_byte_count);
6660     if (src_surface != dst_surface)
6661         dbuf = dst_map.data;
6662     else
6663         dbuf = (BYTE *)dst_map.data
6664                 + ((dst_rect->top / dst_format->block_height) * dst_map.row_pitch)
6665                 + ((dst_rect->left / dst_format->block_width) * dst_format->block_byte_count);
6666
6667     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_BLOCKS)
6668     {
6669         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6670
6671         if (src_surface == dst_surface)
6672         {
6673             FIXME("Only plain blits supported on compressed surfaces.\n");
6674             hr = E_NOTIMPL;
6675             goto release;
6676         }
6677
6678         if (srcheight != dstheight || srcwidth != dstwidth)
6679         {
6680             WARN("Stretching not supported on compressed surfaces.\n");
6681             hr = WINED3DERR_INVALIDCALL;
6682             goto release;
6683         }
6684
6685         if (srcwidth & (src_format->block_width - 1) || srcheight & (src_format->block_height - 1))
6686         {
6687             WARN("Rectangle not block-aligned.\n");
6688             hr = WINED3DERR_INVALIDCALL;
6689             goto release;
6690         }
6691
6692         hr = surface_cpu_blt_compressed(sbase, dbuf,
6693                 src_map.row_pitch, dst_map.row_pitch, dstwidth, dstheight,
6694                 src_format, flags, fx);
6695         goto release;
6696     }
6697
6698     /* First, all the 'source-less' blits */
6699     if (flags & WINEDDBLT_COLORFILL)
6700     {
6701         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, fx->u5.dwFillColor);
6702         flags &= ~WINEDDBLT_COLORFILL;
6703     }
6704
6705     if (flags & WINEDDBLT_DEPTHFILL)
6706     {
6707         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6708     }
6709     if (flags & WINEDDBLT_ROP)
6710     {
6711         /* Catch some degenerate cases here. */
6712         switch (fx->dwROP)
6713         {
6714             case BLACKNESS:
6715                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, 0);
6716                 break;
6717             case 0xAA0029: /* No-op */
6718                 break;
6719             case WHITENESS:
6720                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, ~0U);
6721                 break;
6722             case SRCCOPY: /* Well, we do that below? */
6723                 break;
6724             default:
6725                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6726                 goto error;
6727         }
6728         flags &= ~WINEDDBLT_ROP;
6729     }
6730     if (flags & WINEDDBLT_DDROPS)
6731     {
6732         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6733     }
6734     /* Now the 'with source' blits. */
6735     if (src_surface)
6736     {
6737         int sx, xinc, sy, yinc;
6738
6739         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6740             goto release;
6741
6742         if (filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT
6743                 && (srcwidth != dstwidth || srcheight != dstheight))
6744         {
6745             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6746             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6747         }
6748
6749         xinc = (srcwidth << 16) / dstwidth;
6750         yinc = (srcheight << 16) / dstheight;
6751
6752         if (!flags)
6753         {
6754             /* No effects, we can cheat here. */
6755             if (dstwidth == srcwidth)
6756             {
6757                 if (dstheight == srcheight)
6758                 {
6759                     /* No stretching in either direction. This needs to be as
6760                      * fast as possible. */
6761                     sbuf = sbase;
6762
6763                     /* Check for overlapping surfaces. */
6764                     if (src_surface != dst_surface || dst_rect->top < src_rect->top
6765                             || dst_rect->right <= src_rect->left || src_rect->right <= dst_rect->left)
6766                     {
6767                         /* No overlap, or dst above src, so copy from top downwards. */
6768                         for (y = 0; y < dstheight; ++y)
6769                         {
6770                             memcpy(dbuf, sbuf, width);
6771                             sbuf += src_map.row_pitch;
6772                             dbuf += dst_map.row_pitch;
6773                         }
6774                     }
6775                     else if (dst_rect->top > src_rect->top)
6776                     {
6777                         /* Copy from bottom upwards. */
6778                         sbuf += src_map.row_pitch * dstheight;
6779                         dbuf += dst_map.row_pitch * dstheight;
6780                         for (y = 0; y < dstheight; ++y)
6781                         {
6782                             sbuf -= src_map.row_pitch;
6783                             dbuf -= dst_map.row_pitch;
6784                             memcpy(dbuf, sbuf, width);
6785                         }
6786                     }
6787                     else
6788                     {
6789                         /* Src and dst overlapping on the same line, use memmove. */
6790                         for (y = 0; y < dstheight; ++y)
6791                         {
6792                             memmove(dbuf, sbuf, width);
6793                             sbuf += src_map.row_pitch;
6794                             dbuf += dst_map.row_pitch;
6795                         }
6796                     }
6797                 }
6798                 else
6799                 {
6800                     /* Stretching in y direction only. */
6801                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6802                     {
6803                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6804                         memcpy(dbuf, sbuf, width);
6805                         dbuf += dst_map.row_pitch;
6806                     }
6807                 }
6808             }
6809             else
6810             {
6811                 /* Stretching in X direction. */
6812                 int last_sy = -1;
6813                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6814                 {
6815                     sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6816
6817                     if ((sy >> 16) == (last_sy >> 16))
6818                     {
6819                         /* This source row is the same as last source row -
6820                          * Copy the already stretched row. */
6821                         memcpy(dbuf, dbuf - dst_map.row_pitch, width);
6822                     }
6823                     else
6824                     {
6825 #define STRETCH_ROW(type) \
6826 do { \
6827     const type *s = (const type *)sbuf; \
6828     type *d = (type *)dbuf; \
6829     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6830         d[x] = s[sx >> 16]; \
6831 } while(0)
6832
6833                         switch(bpp)
6834                         {
6835                             case 1:
6836                                 STRETCH_ROW(BYTE);
6837                                 break;
6838                             case 2:
6839                                 STRETCH_ROW(WORD);
6840                                 break;
6841                             case 4:
6842                                 STRETCH_ROW(DWORD);
6843                                 break;
6844                             case 3:
6845                             {
6846                                 const BYTE *s;
6847                                 BYTE *d = dbuf;
6848                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6849                                 {
6850                                     DWORD pixel;
6851
6852                                     s = sbuf + 3 * (sx >> 16);
6853                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6854                                     d[0] = (pixel      ) & 0xff;
6855                                     d[1] = (pixel >>  8) & 0xff;
6856                                     d[2] = (pixel >> 16) & 0xff;
6857                                     d += 3;
6858                                 }
6859                                 break;
6860                             }
6861                             default:
6862                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6863                                 hr = WINED3DERR_NOTAVAILABLE;
6864                                 goto error;
6865                         }
6866 #undef STRETCH_ROW
6867                     }
6868                     dbuf += dst_map.row_pitch;
6869                     last_sy = sy;
6870                 }
6871             }
6872         }
6873         else
6874         {
6875             LONG dstyinc = dst_map.row_pitch, dstxinc = bpp;
6876             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6877             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6878             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6879             {
6880                 /* The color keying flags are checked for correctness in ddraw */
6881                 if (flags & WINEDDBLT_KEYSRC)
6882                 {
6883                     keylow  = src_surface->src_blt_color_key.color_space_low_value;
6884                     keyhigh = src_surface->src_blt_color_key.color_space_high_value;
6885                 }
6886                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6887                 {
6888                     keylow = fx->ddckSrcColorkey.color_space_low_value;
6889                     keyhigh = fx->ddckSrcColorkey.color_space_high_value;
6890                 }
6891
6892                 if (flags & WINEDDBLT_KEYDEST)
6893                 {
6894                     /* Destination color keys are taken from the source surface! */
6895                     destkeylow = src_surface->dst_blt_color_key.color_space_low_value;
6896                     destkeyhigh = src_surface->dst_blt_color_key.color_space_high_value;
6897                 }
6898                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6899                 {
6900                     destkeylow = fx->ddckDestColorkey.color_space_low_value;
6901                     destkeyhigh = fx->ddckDestColorkey.color_space_high_value;
6902                 }
6903
6904                 if (bpp == 1)
6905                 {
6906                     keymask = 0xff;
6907                 }
6908                 else
6909                 {
6910                     keymask = src_format->red_mask
6911                             | src_format->green_mask
6912                             | src_format->blue_mask;
6913                 }
6914                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6915             }
6916
6917             if (flags & WINEDDBLT_DDFX)
6918             {
6919                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6920                 LONG tmpxy;
6921                 dTopLeft     = dbuf;
6922                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6923                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dst_map.row_pitch);
6924                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6925
6926                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6927                 {
6928                     /* I don't think we need to do anything about this flag */
6929                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6930                 }
6931                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6932                 {
6933                     tmp          = dTopRight;
6934                     dTopRight    = dTopLeft;
6935                     dTopLeft     = tmp;
6936                     tmp          = dBottomRight;
6937                     dBottomRight = dBottomLeft;
6938                     dBottomLeft  = tmp;
6939                     dstxinc = dstxinc * -1;
6940                 }
6941                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6942                 {
6943                     tmp          = dTopLeft;
6944                     dTopLeft     = dBottomLeft;
6945                     dBottomLeft  = tmp;
6946                     tmp          = dTopRight;
6947                     dTopRight    = dBottomRight;
6948                     dBottomRight = tmp;
6949                     dstyinc = dstyinc * -1;
6950                 }
6951                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6952                 {
6953                     /* I don't think we need to do anything about this flag */
6954                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6955                 }
6956                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6957                 {
6958                     tmp          = dBottomRight;
6959                     dBottomRight = dTopLeft;
6960                     dTopLeft     = tmp;
6961                     tmp          = dBottomLeft;
6962                     dBottomLeft  = dTopRight;
6963                     dTopRight    = tmp;
6964                     dstxinc = dstxinc * -1;
6965                     dstyinc = dstyinc * -1;
6966                 }
6967                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6968                 {
6969                     tmp          = dTopLeft;
6970                     dTopLeft     = dBottomLeft;
6971                     dBottomLeft  = dBottomRight;
6972                     dBottomRight = dTopRight;
6973                     dTopRight    = tmp;
6974                     tmpxy   = dstxinc;
6975                     dstxinc = dstyinc;
6976                     dstyinc = tmpxy;
6977                     dstxinc = dstxinc * -1;
6978                 }
6979                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6980                 {
6981                     tmp          = dTopLeft;
6982                     dTopLeft     = dTopRight;
6983                     dTopRight    = dBottomRight;
6984                     dBottomRight = dBottomLeft;
6985                     dBottomLeft  = tmp;
6986                     tmpxy   = dstxinc;
6987                     dstxinc = dstyinc;
6988                     dstyinc = tmpxy;
6989                     dstyinc = dstyinc * -1;
6990                 }
6991                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6992                 {
6993                     /* I don't think we need to do anything about this flag */
6994                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6995                 }
6996                 dbuf = dTopLeft;
6997                 flags &= ~(WINEDDBLT_DDFX);
6998             }
6999
7000 #define COPY_COLORKEY_FX(type) \
7001 do { \
7002     const type *s; \
7003     type *d = (type *)dbuf, *dx, tmp; \
7004     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
7005     { \
7006         s = (const type *)(sbase + (sy >> 16) * src_map.row_pitch); \
7007         dx = d; \
7008         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
7009         { \
7010             tmp = s[sx >> 16]; \
7011             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
7012                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
7013             { \
7014                 dx[0] = tmp; \
7015             } \
7016             dx = (type *)(((BYTE *)dx) + dstxinc); \
7017         } \
7018         d = (type *)(((BYTE *)d) + dstyinc); \
7019     } \
7020 } while(0)
7021
7022             switch (bpp)
7023             {
7024                 case 1:
7025                     COPY_COLORKEY_FX(BYTE);
7026                     break;
7027                 case 2:
7028                     COPY_COLORKEY_FX(WORD);
7029                     break;
7030                 case 4:
7031                     COPY_COLORKEY_FX(DWORD);
7032                     break;
7033                 case 3:
7034                 {
7035                     const BYTE *s;
7036                     BYTE *d = dbuf, *dx;
7037                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
7038                     {
7039                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
7040                         dx = d;
7041                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7042                         {
7043                             DWORD pixel, dpixel = 0;
7044                             s = sbuf + 3 * (sx>>16);
7045                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7046                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7047                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7048                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7049                             {
7050                                 dx[0] = (pixel      ) & 0xff;
7051                                 dx[1] = (pixel >>  8) & 0xff;
7052                                 dx[2] = (pixel >> 16) & 0xff;
7053                             }
7054                             dx += dstxinc;
7055                         }
7056                         d += dstyinc;
7057                     }
7058                     break;
7059                 }
7060                 default:
7061                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7062                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7063                     hr = WINED3DERR_NOTAVAILABLE;
7064                     goto error;
7065 #undef COPY_COLORKEY_FX
7066             }
7067         }
7068     }
7069
7070 error:
7071     if (flags && FIXME_ON(d3d_surface))
7072     {
7073         FIXME("\tUnsupported flags: %#x.\n", flags);
7074     }
7075
7076 release:
7077     wined3d_surface_unmap(dst_surface);
7078     if (src_surface && src_surface != dst_surface)
7079         wined3d_surface_unmap(src_surface);
7080     /* Release the converted surface, if any. */
7081     if (src_surface && src_surface != orig_src)
7082         wined3d_surface_decref(src_surface);
7083
7084     return hr;
7085 }
7086
7087 /* Do not call while under the GL lock. */
7088 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7089         const RECT *dst_rect, const struct wined3d_color *color)
7090 {
7091     static const RECT src_rect;
7092     WINEDDBLTFX BltFx;
7093
7094     memset(&BltFx, 0, sizeof(BltFx));
7095     BltFx.dwSize = sizeof(BltFx);
7096     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7097     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7098             WINEDDBLT_COLORFILL, &BltFx, WINED3D_TEXF_POINT);
7099 }
7100
7101 /* Do not call while under the GL lock. */
7102 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7103         struct wined3d_surface *surface, const RECT *rect, float depth)
7104 {
7105     FIXME("Depth filling not implemented by cpu_blit.\n");
7106     return WINED3DERR_INVALIDCALL;
7107 }
7108
7109 const struct blit_shader cpu_blit =  {
7110     cpu_blit_alloc,
7111     cpu_blit_free,
7112     cpu_blit_set,
7113     cpu_blit_unset,
7114     cpu_blit_supported,
7115     cpu_blit_color_fill,
7116     cpu_blit_depth_fill,
7117 };
7118
7119 static HRESULT surface_init(struct wined3d_surface *surface, enum wined3d_surface_type surface_type, UINT alignment,
7120         UINT width, UINT height, UINT level, enum wined3d_multisample_type multisample_type,
7121         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7122         enum wined3d_pool pool, DWORD flags, void *parent, const struct wined3d_parent_ops *parent_ops)
7123 {
7124     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7125     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7126     BOOL lockable = flags & WINED3D_SURFACE_MAPPABLE;
7127     unsigned int resource_size;
7128     HRESULT hr;
7129
7130     if (multisample_quality > 0)
7131     {
7132         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7133         multisample_quality = 0;
7134     }
7135
7136     /* Quick lockable sanity check.
7137      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7138      * this function is too deep to need to care about things like this.
7139      * Levels need to be checked too, since they all affect what can be done. */
7140     switch (pool)
7141     {
7142         case WINED3D_POOL_SCRATCH:
7143             if (!lockable)
7144             {
7145                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7146                         "which are mutually exclusive, setting lockable to TRUE.\n");
7147                 lockable = TRUE;
7148             }
7149             break;
7150
7151         case WINED3D_POOL_SYSTEM_MEM:
7152             if (!lockable)
7153                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7154             break;
7155
7156         case WINED3D_POOL_MANAGED:
7157             if (usage & WINED3DUSAGE_DYNAMIC)
7158                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7159             break;
7160
7161         case WINED3D_POOL_DEFAULT:
7162             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7163                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7164             break;
7165
7166         default:
7167             FIXME("Unknown pool %#x.\n", pool);
7168             break;
7169     };
7170
7171     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3D_POOL_DEFAULT)
7172         FIXME("Trying to create a render target that isn't in the default pool.\n");
7173
7174     /* FIXME: Check that the format is supported by the device. */
7175
7176     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7177     if (!resource_size)
7178         return WINED3DERR_INVALIDCALL;
7179
7180     surface->surface_type = surface_type;
7181
7182     switch (surface_type)
7183     {
7184         case WINED3D_SURFACE_TYPE_OPENGL:
7185             surface->surface_ops = &surface_ops;
7186             break;
7187
7188         case WINED3D_SURFACE_TYPE_GDI:
7189             surface->surface_ops = &gdi_surface_ops;
7190             break;
7191
7192         default:
7193             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7194             return WINED3DERR_INVALIDCALL;
7195     }
7196
7197     hr = resource_init(&surface->resource, device, WINED3D_RTYPE_SURFACE, format,
7198             multisample_type, multisample_quality, usage, pool, width, height, 1,
7199             resource_size, parent, parent_ops, &surface_resource_ops);
7200     if (FAILED(hr))
7201     {
7202         WARN("Failed to initialize resource, returning %#x.\n", hr);
7203         return hr;
7204     }
7205
7206     /* "Standalone" surface. */
7207     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7208
7209     surface->texture_level = level;
7210     list_init(&surface->overlays);
7211
7212     /* Flags */
7213     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7214     if (flags & WINED3D_SURFACE_DISCARD)
7215         surface->flags |= SFLAG_DISCARD;
7216     if (flags & WINED3D_SURFACE_PIN_SYSMEM)
7217         surface->flags |= SFLAG_PIN_SYSMEM;
7218     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7219         surface->flags |= SFLAG_LOCKABLE;
7220     /* I'm not sure if this qualifies as a hack or as an optimization. It
7221      * seems reasonable to assume that lockable render targets will get
7222      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7223      * creation. However, the other reason we want to do this is that several
7224      * ddraw applications access surface memory while the surface isn't
7225      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7226      * future locks prevents these from crashing. */
7227     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7228         surface->flags |= SFLAG_DYNLOCK;
7229
7230     /* Mark the texture as dirty so that it gets loaded first time around. */
7231     surface_add_dirty_rect(surface, NULL);
7232     list_init(&surface->renderbuffers);
7233
7234     TRACE("surface %p, memory %p, size %u\n",
7235             surface, surface->resource.allocatedMemory, surface->resource.size);
7236
7237     /* Call the private setup routine */
7238     hr = surface->surface_ops->surface_private_setup(surface);
7239     if (FAILED(hr))
7240     {
7241         ERR("Private setup failed, returning %#x\n", hr);
7242         surface_cleanup(surface);
7243         return hr;
7244     }
7245
7246     /* Similar to lockable rendertargets above, creating the DIB section
7247      * during surface initialization prevents the sysmem pointer from changing
7248      * after a wined3d_surface_getdc() call. */
7249     if ((usage & WINED3DUSAGE_OWNDC) && !surface->hDC
7250             && SUCCEEDED(surface_create_dib_section(surface)))
7251     {
7252         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
7253         surface->resource.heapMemory = NULL;
7254         surface->resource.allocatedMemory = surface->dib.bitmap_data;
7255     }
7256
7257     return hr;
7258 }
7259
7260 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7261         enum wined3d_format_id format_id, UINT level, DWORD usage, enum wined3d_pool pool,
7262         enum wined3d_multisample_type multisample_type, DWORD multisample_quality,
7263         enum wined3d_surface_type surface_type, DWORD flags, void *parent,
7264         const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7265 {
7266     struct wined3d_surface *object;
7267     HRESULT hr;
7268
7269     TRACE("device %p, width %u, height %u, format %s, level %u\n",
7270             device, width, height, debug_d3dformat(format_id), level);
7271     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7272             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7273     TRACE("surface_type %#x, flags %#x, parent %p, parent_ops %p.\n", surface_type, flags, parent, parent_ops);
7274
7275     if (surface_type == WINED3D_SURFACE_TYPE_OPENGL && !device->adapter)
7276     {
7277         ERR("OpenGL surfaces are not available without OpenGL.\n");
7278         return WINED3DERR_NOTAVAILABLE;
7279     }
7280
7281     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7282     if (!object)
7283     {
7284         ERR("Failed to allocate surface memory.\n");
7285         return WINED3DERR_OUTOFVIDEOMEMORY;
7286     }
7287
7288     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level,
7289             multisample_type, multisample_quality, device, usage, format_id, pool, flags, parent, parent_ops);
7290     if (FAILED(hr))
7291     {
7292         WARN("Failed to initialize surface, returning %#x.\n", hr);
7293         HeapFree(GetProcessHeap(), 0, object);
7294         return hr;
7295     }
7296
7297     TRACE("Created surface %p.\n", object);
7298     *surface = object;
7299
7300     return hr;
7301 }