dsound: Always enumerate the default device first.
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2008 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         WINED3DTEXTUREFILTERTYPE filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     struct wined3d_surface *overlay, *cur;
46
47     TRACE("surface %p.\n", surface);
48
49     if (surface->texture_name || (surface->flags & SFLAG_PBO)
50              || surface->rb_multisample || surface->rb_resolved
51              || !list_empty(&surface->renderbuffers))
52     {
53         struct wined3d_renderbuffer_entry *entry, *entry2;
54         const struct wined3d_gl_info *gl_info;
55         struct wined3d_context *context;
56
57         context = context_acquire(surface->resource.device, NULL);
58         gl_info = context->gl_info;
59
60         ENTER_GL();
61
62         if (surface->texture_name)
63         {
64             TRACE("Deleting texture %u.\n", surface->texture_name);
65             glDeleteTextures(1, &surface->texture_name);
66         }
67
68         if (surface->flags & SFLAG_PBO)
69         {
70             TRACE("Deleting PBO %u.\n", surface->pbo);
71             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
72         }
73
74         if (surface->rb_multisample)
75         {
76             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
77             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
78         }
79
80         if (surface->rb_resolved)
81         {
82             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
83             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
84         }
85
86         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
87         {
88             TRACE("Deleting renderbuffer %u.\n", entry->id);
89             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
90             HeapFree(GetProcessHeap(), 0, entry);
91         }
92
93         LEAVE_GL();
94
95         context_release(context);
96     }
97
98     if (surface->flags & SFLAG_DIBSECTION)
99     {
100         /* Release the DC. */
101         SelectObject(surface->hDC, surface->dib.holdbitmap);
102         DeleteDC(surface->hDC);
103         /* Release the DIB section. */
104         DeleteObject(surface->dib.DIBsection);
105         surface->dib.bitmap_data = NULL;
106         surface->resource.allocatedMemory = NULL;
107     }
108
109     if (surface->flags & SFLAG_USERPTR)
110         wined3d_surface_set_mem(surface, NULL);
111     if (surface->overlay_dest)
112         list_remove(&surface->overlay_entry);
113
114     LIST_FOR_EACH_ENTRY_SAFE(overlay, cur, &surface->overlays, struct wined3d_surface, overlay_entry)
115     {
116         list_remove(&overlay->overlay_entry);
117         overlay->overlay_dest = NULL;
118     }
119
120     HeapFree(GetProcessHeap(), 0, surface->palette9);
121
122     resource_cleanup(&surface->resource);
123 }
124
125 void surface_update_draw_binding(struct wined3d_surface *surface)
126 {
127     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
128         surface->draw_binding = SFLAG_INDRAWABLE;
129     else if (surface->resource.multisample_type)
130         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
131     else
132         surface->draw_binding = SFLAG_INTEXTURE;
133 }
134
135 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
136 {
137     TRACE("surface %p, container %p.\n", surface, container);
138
139     if (!container && type != WINED3D_CONTAINER_NONE)
140         ERR("Setting NULL container of type %#x.\n", type);
141
142     if (type == WINED3D_CONTAINER_SWAPCHAIN)
143     {
144         surface->get_drawable_size = get_drawable_size_swapchain;
145     }
146     else
147     {
148         switch (wined3d_settings.offscreen_rendering_mode)
149         {
150             case ORM_FBO:
151                 surface->get_drawable_size = get_drawable_size_fbo;
152                 break;
153
154             case ORM_BACKBUFFER:
155                 surface->get_drawable_size = get_drawable_size_backbuffer;
156                 break;
157
158             default:
159                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
160                 return;
161         }
162     }
163
164     surface->container.type = type;
165     surface->container.u.base = container;
166     surface_update_draw_binding(surface);
167 }
168
169 struct blt_info
170 {
171     GLenum binding;
172     GLenum bind_target;
173     enum tex_types tex_type;
174     GLfloat coords[4][3];
175 };
176
177 struct float_rect
178 {
179     float l;
180     float t;
181     float r;
182     float b;
183 };
184
185 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
186 {
187     f->l = ((r->left * 2.0f) / w) - 1.0f;
188     f->t = ((r->top * 2.0f) / h) - 1.0f;
189     f->r = ((r->right * 2.0f) / w) - 1.0f;
190     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
191 }
192
193 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
194 {
195     GLfloat (*coords)[3] = info->coords;
196     struct float_rect f;
197
198     switch (target)
199     {
200         default:
201             FIXME("Unsupported texture target %#x\n", target);
202             /* Fall back to GL_TEXTURE_2D */
203         case GL_TEXTURE_2D:
204             info->binding = GL_TEXTURE_BINDING_2D;
205             info->bind_target = GL_TEXTURE_2D;
206             info->tex_type = tex_2d;
207             coords[0][0] = (float)rect->left / w;
208             coords[0][1] = (float)rect->top / h;
209             coords[0][2] = 0.0f;
210
211             coords[1][0] = (float)rect->right / w;
212             coords[1][1] = (float)rect->top / h;
213             coords[1][2] = 0.0f;
214
215             coords[2][0] = (float)rect->left / w;
216             coords[2][1] = (float)rect->bottom / h;
217             coords[2][2] = 0.0f;
218
219             coords[3][0] = (float)rect->right / w;
220             coords[3][1] = (float)rect->bottom / h;
221             coords[3][2] = 0.0f;
222             break;
223
224         case GL_TEXTURE_RECTANGLE_ARB:
225             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
226             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
227             info->tex_type = tex_rect;
228             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
229             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
230             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
231             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
232             break;
233
234         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
235             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
236             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
237             info->tex_type = tex_cube;
238             cube_coords_float(rect, w, h, &f);
239
240             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
241             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
242             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
243             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
244             break;
245
246         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
247             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
248             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
249             info->tex_type = tex_cube;
250             cube_coords_float(rect, w, h, &f);
251
252             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
253             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
254             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
255             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
256             break;
257
258         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
259             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
260             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
261             info->tex_type = tex_cube;
262             cube_coords_float(rect, w, h, &f);
263
264             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
265             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
266             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
267             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
268             break;
269
270         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
271             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
272             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
273             info->tex_type = tex_cube;
274             cube_coords_float(rect, w, h, &f);
275
276             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
277             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
278             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
279             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
280             break;
281
282         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
283             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
284             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
285             info->tex_type = tex_cube;
286             cube_coords_float(rect, w, h, &f);
287
288             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
289             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
290             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
291             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
292             break;
293
294         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
295             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
296             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
297             info->tex_type = tex_cube;
298             cube_coords_float(rect, w, h, &f);
299
300             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
301             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
302             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
303             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
304             break;
305     }
306 }
307
308 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
309 {
310     if (rect_in)
311         *rect_out = *rect_in;
312     else
313     {
314         rect_out->left = 0;
315         rect_out->top = 0;
316         rect_out->right = surface->resource.width;
317         rect_out->bottom = surface->resource.height;
318     }
319 }
320
321 /* GL locking and context activation is done by the caller */
322 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
323         const RECT *src_rect, const RECT *dst_rect, WINED3DTEXTUREFILTERTYPE Filter)
324 {
325     struct blt_info info;
326
327     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
328
329     glEnable(info.bind_target);
330     checkGLcall("glEnable(bind_target)");
331
332     context_bind_texture(context, info.bind_target, src_surface->texture_name);
333
334     /* Filtering for StretchRect */
335     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
336             wined3d_gl_mag_filter(magLookup, Filter));
337     checkGLcall("glTexParameteri");
338     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
339             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
340     checkGLcall("glTexParameteri");
341     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
342     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
343     if (context->gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
344         glTexParameteri(info.bind_target, GL_TEXTURE_SRGB_DECODE_EXT, GL_SKIP_DECODE_EXT);
345     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
346     checkGLcall("glTexEnvi");
347
348     /* Draw a quad */
349     glBegin(GL_TRIANGLE_STRIP);
350     glTexCoord3fv(info.coords[0]);
351     glVertex2i(dst_rect->left, dst_rect->top);
352
353     glTexCoord3fv(info.coords[1]);
354     glVertex2i(dst_rect->right, dst_rect->top);
355
356     glTexCoord3fv(info.coords[2]);
357     glVertex2i(dst_rect->left, dst_rect->bottom);
358
359     glTexCoord3fv(info.coords[3]);
360     glVertex2i(dst_rect->right, dst_rect->bottom);
361     glEnd();
362
363     /* Unbind the texture */
364     context_bind_texture(context, info.bind_target, 0);
365
366     /* We changed the filtering settings on the texture. Inform the
367      * container about this to get the filters reset properly next draw. */
368     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
369     {
370         struct wined3d_texture *texture = src_surface->container.u.texture;
371         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3DTEXF_POINT;
372         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3DTEXF_POINT;
373         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3DTEXF_NONE;
374         texture->texture_rgb.states[WINED3DTEXSTA_SRGBTEXTURE] = FALSE;
375     }
376 }
377
378 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
379 {
380     const struct wined3d_format *format = surface->resource.format;
381     SYSTEM_INFO sysInfo;
382     BITMAPINFO *b_info;
383     int extraline = 0;
384     DWORD *masks;
385     UINT usage;
386     HDC dc;
387
388     TRACE("surface %p.\n", surface);
389
390     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
391     {
392         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
393         return WINED3DERR_INVALIDCALL;
394     }
395
396     switch (format->byte_count)
397     {
398         case 2:
399         case 4:
400             /* Allocate extra space to store the RGB bit masks. */
401             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
402             break;
403
404         case 3:
405             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
406             break;
407
408         default:
409             /* Allocate extra space for a palette. */
410             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
411                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
412             break;
413     }
414
415     if (!b_info)
416         return E_OUTOFMEMORY;
417
418     /* Some applications access the surface in via DWORDs, and do not take
419      * the necessary care at the end of the surface. So we need at least
420      * 4 extra bytes at the end of the surface. Check against the page size,
421      * if the last page used for the surface has at least 4 spare bytes we're
422      * safe, otherwise add an extra line to the DIB section. */
423     GetSystemInfo(&sysInfo);
424     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
425     {
426         extraline = 1;
427         TRACE("Adding an extra line to the DIB section.\n");
428     }
429
430     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
431     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
432     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
433     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
434     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
435             * wined3d_surface_get_pitch(surface);
436     b_info->bmiHeader.biPlanes = 1;
437     b_info->bmiHeader.biBitCount = format->byte_count * 8;
438
439     b_info->bmiHeader.biXPelsPerMeter = 0;
440     b_info->bmiHeader.biYPelsPerMeter = 0;
441     b_info->bmiHeader.biClrUsed = 0;
442     b_info->bmiHeader.biClrImportant = 0;
443
444     /* Get the bit masks */
445     masks = (DWORD *)b_info->bmiColors;
446     switch (surface->resource.format->id)
447     {
448         case WINED3DFMT_B8G8R8_UNORM:
449             usage = DIB_RGB_COLORS;
450             b_info->bmiHeader.biCompression = BI_RGB;
451             break;
452
453         case WINED3DFMT_B5G5R5X1_UNORM:
454         case WINED3DFMT_B5G5R5A1_UNORM:
455         case WINED3DFMT_B4G4R4A4_UNORM:
456         case WINED3DFMT_B4G4R4X4_UNORM:
457         case WINED3DFMT_B2G3R3_UNORM:
458         case WINED3DFMT_B2G3R3A8_UNORM:
459         case WINED3DFMT_R10G10B10A2_UNORM:
460         case WINED3DFMT_R8G8B8A8_UNORM:
461         case WINED3DFMT_R8G8B8X8_UNORM:
462         case WINED3DFMT_B10G10R10A2_UNORM:
463         case WINED3DFMT_B5G6R5_UNORM:
464         case WINED3DFMT_R16G16B16A16_UNORM:
465             usage = 0;
466             b_info->bmiHeader.biCompression = BI_BITFIELDS;
467             masks[0] = format->red_mask;
468             masks[1] = format->green_mask;
469             masks[2] = format->blue_mask;
470             break;
471
472         default:
473             /* Don't know palette */
474             b_info->bmiHeader.biCompression = BI_RGB;
475             usage = 0;
476             break;
477     }
478
479     if (!(dc = GetDC(0)))
480     {
481         HeapFree(GetProcessHeap(), 0, b_info);
482         return HRESULT_FROM_WIN32(GetLastError());
483     }
484
485     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
486             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
487             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
488     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
489     ReleaseDC(0, dc);
490
491     if (!surface->dib.DIBsection)
492     {
493         ERR("Failed to create DIB section.\n");
494         HeapFree(GetProcessHeap(), 0, b_info);
495         return HRESULT_FROM_WIN32(GetLastError());
496     }
497
498     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
499     /* Copy the existing surface to the dib section. */
500     if (surface->resource.allocatedMemory)
501     {
502         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
503                 surface->resource.height * wined3d_surface_get_pitch(surface));
504     }
505     else
506     {
507         /* This is to make maps read the GL texture although memory is allocated. */
508         surface->flags &= ~SFLAG_INSYSMEM;
509     }
510     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
511
512     HeapFree(GetProcessHeap(), 0, b_info);
513
514     /* Now allocate a DC. */
515     surface->hDC = CreateCompatibleDC(0);
516     surface->dib.holdbitmap = SelectObject(surface->hDC, surface->dib.DIBsection);
517     TRACE("Using wined3d palette %p.\n", surface->palette);
518     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
519
520     surface->flags |= SFLAG_DIBSECTION;
521
522     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
523     surface->resource.heapMemory = NULL;
524
525     return WINED3D_OK;
526 }
527
528 static void surface_prepare_system_memory(struct wined3d_surface *surface)
529 {
530     struct wined3d_device *device = surface->resource.device;
531     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
532
533     TRACE("surface %p.\n", surface);
534
535     /* Performance optimization: Count how often a surface is locked, if it is
536      * locked regularly do not throw away the system memory copy. This avoids
537      * the need to download the surface from OpenGL all the time. The surface
538      * is still downloaded if the OpenGL texture is changed. */
539     if (!(surface->flags & SFLAG_DYNLOCK))
540     {
541         if (++surface->lockCount > MAXLOCKCOUNT)
542         {
543             TRACE("Surface is locked regularly, not freeing the system memory copy any more.\n");
544             surface->flags |= SFLAG_DYNLOCK;
545         }
546     }
547
548     /* Create a PBO for dynamically locked surfaces but don't do it for
549      * converted or NPOT surfaces. Also don't create a PBO for systemmem
550      * surfaces. */
551     if (gl_info->supported[ARB_PIXEL_BUFFER_OBJECT] && (surface->flags & SFLAG_DYNLOCK)
552             && !(surface->flags & (SFLAG_PBO | SFLAG_CONVERTED | SFLAG_NONPOW2))
553             && (surface->resource.pool != WINED3DPOOL_SYSTEMMEM))
554     {
555         struct wined3d_context *context;
556         GLenum error;
557
558         context = context_acquire(device, NULL);
559         ENTER_GL();
560
561         GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
562         error = glGetError();
563         if (!surface->pbo || error != GL_NO_ERROR)
564             ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
565
566         TRACE("Binding PBO %u.\n", surface->pbo);
567
568         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
569         checkGLcall("glBindBufferARB");
570
571         GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
572                 surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
573         checkGLcall("glBufferDataARB");
574
575         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
576         checkGLcall("glBindBufferARB");
577
578         /* We don't need the system memory anymore and we can't even use it for PBOs. */
579         if (!(surface->flags & SFLAG_CLIENT))
580         {
581             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
582             surface->resource.heapMemory = NULL;
583         }
584         surface->resource.allocatedMemory = NULL;
585         surface->flags |= SFLAG_PBO;
586         LEAVE_GL();
587         context_release(context);
588     }
589     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
590     {
591         /* Whatever surface we have, make sure that there is memory allocated
592          * for the downloaded copy, or a PBO to map. */
593         if (!surface->resource.heapMemory)
594             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
595
596         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
597                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
598
599         if (surface->flags & SFLAG_INSYSMEM)
600             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
601     }
602 }
603
604 static void surface_evict_sysmem(struct wined3d_surface *surface)
605 {
606     if (surface->flags & SFLAG_DONOTFREE)
607         return;
608
609     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
610     surface->resource.allocatedMemory = NULL;
611     surface->resource.heapMemory = NULL;
612     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
613 }
614
615 /* Context activation is done by the caller. */
616 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
617         struct wined3d_context *context, BOOL srgb)
618 {
619     struct wined3d_device *device = surface->resource.device;
620     DWORD active_sampler;
621
622     /* We don't need a specific texture unit, but after binding the texture
623      * the current unit is dirty. Read the unit back instead of switching to
624      * 0, this avoids messing around with the state manager's GL states. The
625      * current texture unit should always be a valid one.
626      *
627      * To be more specific, this is tricky because we can implicitly be
628      * called from sampler() in state.c. This means we can't touch anything
629      * other than whatever happens to be the currently active texture, or we
630      * would risk marking already applied sampler states dirty again. */
631     active_sampler = device->rev_tex_unit_map[context->active_texture];
632
633     if (active_sampler != WINED3D_UNMAPPED_STAGE)
634         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
635     surface_bind(surface, context, srgb);
636 }
637
638 static void surface_force_reload(struct wined3d_surface *surface)
639 {
640     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
641 }
642
643 static void surface_release_client_storage(struct wined3d_surface *surface)
644 {
645     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
646
647     ENTER_GL();
648     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
649     if (surface->texture_name)
650     {
651         surface_bind_and_dirtify(surface, context, FALSE);
652         glTexImage2D(surface->texture_target, surface->texture_level,
653                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
654     }
655     if (surface->texture_name_srgb)
656     {
657         surface_bind_and_dirtify(surface, context, TRUE);
658         glTexImage2D(surface->texture_target, surface->texture_level,
659                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
660     }
661     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
662     LEAVE_GL();
663
664     context_release(context);
665
666     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
667     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
668     surface_force_reload(surface);
669 }
670
671 static HRESULT surface_private_setup(struct wined3d_surface *surface)
672 {
673     /* TODO: Check against the maximum texture sizes supported by the video card. */
674     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
675     unsigned int pow2Width, pow2Height;
676
677     TRACE("surface %p.\n", surface);
678
679     surface->texture_name = 0;
680     surface->texture_target = GL_TEXTURE_2D;
681
682     /* Non-power2 support */
683     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
684     {
685         pow2Width = surface->resource.width;
686         pow2Height = surface->resource.height;
687     }
688     else
689     {
690         /* Find the nearest pow2 match */
691         pow2Width = pow2Height = 1;
692         while (pow2Width < surface->resource.width)
693             pow2Width <<= 1;
694         while (pow2Height < surface->resource.height)
695             pow2Height <<= 1;
696     }
697     surface->pow2Width = pow2Width;
698     surface->pow2Height = pow2Height;
699
700     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
701     {
702         /* TODO: Add support for non power two compressed textures. */
703         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
704         {
705             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
706                   surface, surface->resource.width, surface->resource.height);
707             return WINED3DERR_NOTAVAILABLE;
708         }
709     }
710
711     if (pow2Width != surface->resource.width
712             || pow2Height != surface->resource.height)
713     {
714         surface->flags |= SFLAG_NONPOW2;
715     }
716
717     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
718             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
719     {
720         /* One of three options:
721          * 1: Do the same as we do with NPOT and scale the texture, (any
722          *    texture ops would require the texture to be scaled which is
723          *    potentially slow)
724          * 2: Set the texture to the maximum size (bad idea).
725          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
726          * 4: Create the surface, but allow it to be used only for DirectDraw
727          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
728          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
729          *    the render target. */
730         if (surface->resource.pool == WINED3DPOOL_DEFAULT || surface->resource.pool == WINED3DPOOL_MANAGED)
731         {
732             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
733             return WINED3DERR_NOTAVAILABLE;
734         }
735
736         /* We should never use this surface in combination with OpenGL! */
737         TRACE("Creating an oversized surface: %ux%u.\n",
738                 surface->pow2Width, surface->pow2Height);
739     }
740     else
741     {
742         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
743          * and EXT_PALETTED_TEXTURE is used in combination with texture
744          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
745          * EXT_PALETTED_TEXTURE doesn't work in combination with
746          * ARB_TEXTURE_RECTANGLE. */
747         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
748                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
749                 && gl_info->supported[EXT_PALETTED_TEXTURE]
750                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
751         {
752             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
753             surface->pow2Width = surface->resource.width;
754             surface->pow2Height = surface->resource.height;
755             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
756         }
757     }
758
759     switch (wined3d_settings.offscreen_rendering_mode)
760     {
761         case ORM_FBO:
762             surface->get_drawable_size = get_drawable_size_fbo;
763             break;
764
765         case ORM_BACKBUFFER:
766             surface->get_drawable_size = get_drawable_size_backbuffer;
767             break;
768
769         default:
770             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
771             return WINED3DERR_INVALIDCALL;
772     }
773
774     surface->flags |= SFLAG_INSYSMEM;
775
776     return WINED3D_OK;
777 }
778
779 static void surface_realize_palette(struct wined3d_surface *surface)
780 {
781     struct wined3d_palette *palette = surface->palette;
782
783     TRACE("surface %p.\n", surface);
784
785     if (!palette) return;
786
787     if (surface->resource.format->id == WINED3DFMT_P8_UINT
788             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
789     {
790         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
791         {
792             /* Make sure the texture is up to date. This call doesn't do
793              * anything if the texture is already up to date. */
794             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
795
796             /* We want to force a palette refresh, so mark the drawable as not being up to date */
797             if (!surface_is_offscreen(surface))
798                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
799         }
800         else
801         {
802             if (!(surface->flags & SFLAG_INSYSMEM))
803             {
804                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
805                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
806             }
807             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
808         }
809     }
810
811     if (surface->flags & SFLAG_DIBSECTION)
812     {
813         RGBQUAD col[256];
814         unsigned int i;
815
816         TRACE("Updating the DC's palette.\n");
817
818         for (i = 0; i < 256; ++i)
819         {
820             col[i].rgbRed   = palette->palents[i].peRed;
821             col[i].rgbGreen = palette->palents[i].peGreen;
822             col[i].rgbBlue  = palette->palents[i].peBlue;
823             col[i].rgbReserved = 0;
824         }
825         SetDIBColorTable(surface->hDC, 0, 256, col);
826     }
827
828     /* Propagate the changes to the drawable when we have a palette. */
829     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
830         surface_load_location(surface, surface->draw_binding, NULL);
831 }
832
833 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
834 {
835     HRESULT hr;
836
837     /* If there's no destination surface there is nothing to do. */
838     if (!surface->overlay_dest)
839         return WINED3D_OK;
840
841     /* Blt calls ModifyLocation on the dest surface, which in turn calls
842      * DrawOverlay to update the overlay. Prevent an endless recursion. */
843     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
844         return WINED3D_OK;
845
846     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
847     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
848             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3DTEXF_LINEAR);
849     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
850
851     return hr;
852 }
853
854 static void surface_preload(struct wined3d_surface *surface)
855 {
856     TRACE("surface %p.\n", surface);
857
858     surface_internal_preload(surface, SRGB_ANY);
859 }
860
861 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
862 {
863     struct wined3d_device *device = surface->resource.device;
864     const RECT *pass_rect = rect;
865
866     TRACE("surface %p, rect %s, flags %#x.\n",
867             surface, wine_dbgstr_rect(rect), flags);
868
869     if (flags & WINED3DLOCK_DISCARD)
870     {
871         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
872         surface_prepare_system_memory(surface);
873         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
874     }
875     else
876     {
877         /* surface_load_location() does not check if the rectangle specifies
878          * the full surface. Most callers don't need that, so do it here. */
879         if (rect && !rect->top && !rect->left
880                 && rect->right == surface->resource.width
881                 && rect->bottom == surface->resource.height)
882             pass_rect = NULL;
883
884         if (!(wined3d_settings.rendertargetlock_mode == RTL_DISABLE
885                 && ((surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
886                 || surface == device->fb.render_targets[0])))
887             surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
888     }
889
890     if (surface->flags & SFLAG_PBO)
891     {
892         const struct wined3d_gl_info *gl_info;
893         struct wined3d_context *context;
894
895         context = context_acquire(device, NULL);
896         gl_info = context->gl_info;
897
898         ENTER_GL();
899         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
900         checkGLcall("glBindBufferARB");
901
902         /* This shouldn't happen but could occur if some other function
903          * didn't handle the PBO properly. */
904         if (surface->resource.allocatedMemory)
905             ERR("The surface already has PBO memory allocated.\n");
906
907         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
908         checkGLcall("glMapBufferARB");
909
910         /* Make sure the PBO isn't set anymore in order not to break non-PBO
911          * calls. */
912         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
913         checkGLcall("glBindBufferARB");
914
915         LEAVE_GL();
916         context_release(context);
917     }
918
919     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
920     {
921         if (!rect)
922             surface_add_dirty_rect(surface, NULL);
923         else
924         {
925             WINED3DBOX b;
926
927             b.Left = rect->left;
928             b.Top = rect->top;
929             b.Right = rect->right;
930             b.Bottom = rect->bottom;
931             b.Front = 0;
932             b.Back = 1;
933             surface_add_dirty_rect(surface, &b);
934         }
935     }
936 }
937
938 static void surface_unmap(struct wined3d_surface *surface)
939 {
940     struct wined3d_device *device = surface->resource.device;
941     BOOL fullsurface;
942
943     TRACE("surface %p.\n", surface);
944
945     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
946
947     if (surface->flags & SFLAG_PBO)
948     {
949         const struct wined3d_gl_info *gl_info;
950         struct wined3d_context *context;
951
952         TRACE("Freeing PBO memory.\n");
953
954         context = context_acquire(device, NULL);
955         gl_info = context->gl_info;
956
957         ENTER_GL();
958         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
959         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
960         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
961         checkGLcall("glUnmapBufferARB");
962         LEAVE_GL();
963         context_release(context);
964
965         surface->resource.allocatedMemory = NULL;
966     }
967
968     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
969
970     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
971     {
972         TRACE("Not dirtified, nothing to do.\n");
973         goto done;
974     }
975
976     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
977             || (device->fb.render_targets && surface == device->fb.render_targets[0]))
978     {
979         if (wined3d_settings.rendertargetlock_mode == RTL_DISABLE)
980         {
981             static BOOL warned = FALSE;
982             if (!warned)
983             {
984                 ERR("The application tries to write to the render target, but render target locking is disabled.\n");
985                 warned = TRUE;
986             }
987             goto done;
988         }
989
990         if (!surface->dirtyRect.left && !surface->dirtyRect.top
991                 && surface->dirtyRect.right == surface->resource.width
992                 && surface->dirtyRect.bottom == surface->resource.height)
993         {
994             fullsurface = TRUE;
995         }
996         else
997         {
998             /* TODO: Proper partial rectangle tracking. */
999             fullsurface = FALSE;
1000             surface->flags |= SFLAG_INSYSMEM;
1001         }
1002
1003         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
1004
1005         /* Partial rectangle tracking is not commonly implemented, it is only
1006          * done for render targets. INSYSMEM was set before to tell
1007          * surface_load_location() where to read the rectangle from.
1008          * Indrawable is set because all modifications from the partial
1009          * sysmem copy are written back to the drawable, thus the surface is
1010          * merged again in the drawable. The sysmem copy is not fully up to
1011          * date because only a subrectangle was read in Map(). */
1012         if (!fullsurface)
1013         {
1014             surface_modify_location(surface, surface->draw_binding, TRUE);
1015             surface_evict_sysmem(surface);
1016         }
1017
1018         surface->dirtyRect.left = surface->resource.width;
1019         surface->dirtyRect.top = surface->resource.height;
1020         surface->dirtyRect.right = 0;
1021         surface->dirtyRect.bottom = 0;
1022     }
1023     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
1024     {
1025         FIXME("Depth / stencil buffer locking is not implemented.\n");
1026     }
1027
1028 done:
1029     /* Overlays have to be redrawn manually after changes with the GL implementation */
1030     if (surface->overlay_dest)
1031         surface->surface_ops->surface_draw_overlay(surface);
1032 }
1033
1034 static HRESULT surface_getdc(struct wined3d_surface *surface)
1035 {
1036     WINED3DLOCKED_RECT lock;
1037     HRESULT hr;
1038
1039     TRACE("surface %p.\n", surface);
1040
1041     /* Create a DIB section if there isn't a dc yet. */
1042     if (!surface->hDC)
1043     {
1044         if (surface->flags & SFLAG_CLIENT)
1045         {
1046             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1047             surface_release_client_storage(surface);
1048         }
1049         hr = surface_create_dib_section(surface);
1050         if (FAILED(hr))
1051             return WINED3DERR_INVALIDCALL;
1052
1053         /* Use the DIB section from now on if we are not using a PBO. */
1054         if (!(surface->flags & SFLAG_PBO))
1055             surface->resource.allocatedMemory = surface->dib.bitmap_data;
1056     }
1057
1058     /* Map the surface. */
1059     hr = wined3d_surface_map(surface, &lock, NULL, 0);
1060     if (FAILED(hr))
1061         ERR("Map failed, hr %#x.\n", hr);
1062
1063     /* Sync the DIB with the PBO. This can't be done earlier because Map()
1064      * activates the allocatedMemory. */
1065     if (surface->flags & SFLAG_PBO)
1066         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
1067
1068     return hr;
1069 }
1070
1071 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1072 {
1073     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1074         return FALSE;
1075     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1076         return FALSE;
1077     return TRUE;
1078 }
1079
1080 static void wined3d_surface_depth_blt_fbo(struct wined3d_device *device, struct wined3d_surface *src_surface,
1081         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1082 {
1083     const struct wined3d_gl_info *gl_info;
1084     struct wined3d_context *context;
1085     DWORD src_mask, dst_mask;
1086     GLbitfield gl_mask;
1087
1088     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1089             device, src_surface, wine_dbgstr_rect(src_rect),
1090             dst_surface, wine_dbgstr_rect(dst_rect));
1091
1092     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1093     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1094
1095     if (src_mask != dst_mask)
1096     {
1097         ERR("Incompatible formats %s and %s.\n",
1098                 debug_d3dformat(src_surface->resource.format->id),
1099                 debug_d3dformat(dst_surface->resource.format->id));
1100         return;
1101     }
1102
1103     if (!src_mask)
1104     {
1105         ERR("Not a depth / stencil format: %s.\n",
1106                 debug_d3dformat(src_surface->resource.format->id));
1107         return;
1108     }
1109
1110     gl_mask = 0;
1111     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1112         gl_mask |= GL_DEPTH_BUFFER_BIT;
1113     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1114         gl_mask |= GL_STENCIL_BUFFER_BIT;
1115
1116     /* Make sure the locations are up-to-date. Loading the destination
1117      * surface isn't required if the entire surface is overwritten. */
1118     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1119     if (!surface_is_full_rect(dst_surface, dst_rect))
1120         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1121
1122     context = context_acquire(device, NULL);
1123     if (!context->valid)
1124     {
1125         context_release(context);
1126         WARN("Invalid context, skipping blit.\n");
1127         return;
1128     }
1129
1130     gl_info = context->gl_info;
1131
1132     ENTER_GL();
1133
1134     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1135     glReadBuffer(GL_NONE);
1136     checkGLcall("glReadBuffer()");
1137     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1138
1139     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1140     context_set_draw_buffer(context, GL_NONE);
1141     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1142
1143     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1144     {
1145         glDepthMask(GL_TRUE);
1146         context_invalidate_state(context, STATE_RENDER(WINED3DRS_ZWRITEENABLE));
1147     }
1148     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1149     {
1150         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1151         {
1152             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1153             context_invalidate_state(context, STATE_RENDER(WINED3DRS_TWOSIDEDSTENCILMODE));
1154         }
1155         glStencilMask(~0U);
1156         context_invalidate_state(context, STATE_RENDER(WINED3DRS_STENCILWRITEMASK));
1157     }
1158
1159     glDisable(GL_SCISSOR_TEST);
1160     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1161
1162     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1163             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1164     checkGLcall("glBlitFramebuffer()");
1165
1166     LEAVE_GL();
1167
1168     if (wined3d_settings.strict_draw_ordering)
1169         wglFlush(); /* Flush to ensure ordering across contexts. */
1170
1171     context_release(context);
1172 }
1173
1174 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1175  * Depth / stencil is not supported. */
1176 static void surface_blt_fbo(struct wined3d_device *device, const WINED3DTEXTUREFILTERTYPE filter,
1177         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1178         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1179 {
1180     const struct wined3d_gl_info *gl_info;
1181     struct wined3d_context *context;
1182     RECT src_rect, dst_rect;
1183     GLenum gl_filter;
1184     GLenum buffer;
1185
1186     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1187     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1188             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1189     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1190             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1191
1192     src_rect = *src_rect_in;
1193     dst_rect = *dst_rect_in;
1194
1195     switch (filter)
1196     {
1197         case WINED3DTEXF_LINEAR:
1198             gl_filter = GL_LINEAR;
1199             break;
1200
1201         default:
1202             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1203         case WINED3DTEXF_NONE:
1204         case WINED3DTEXF_POINT:
1205             gl_filter = GL_NEAREST;
1206             break;
1207     }
1208
1209     /* Resolve the source surface first if needed. */
1210     if (src_location == SFLAG_INRB_MULTISAMPLE
1211             && (src_surface->resource.format->id != dst_surface->resource.format->id
1212                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1213                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1214         src_location = SFLAG_INRB_RESOLVED;
1215
1216     /* Make sure the locations are up-to-date. Loading the destination
1217      * surface isn't required if the entire surface is overwritten. (And is
1218      * in fact harmful if we're being called by surface_load_location() with
1219      * the purpose of loading the destination surface.) */
1220     surface_load_location(src_surface, src_location, NULL);
1221     if (!surface_is_full_rect(dst_surface, &dst_rect))
1222         surface_load_location(dst_surface, dst_location, NULL);
1223
1224     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1225     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1226     else context = context_acquire(device, NULL);
1227
1228     if (!context->valid)
1229     {
1230         context_release(context);
1231         WARN("Invalid context, skipping blit.\n");
1232         return;
1233     }
1234
1235     gl_info = context->gl_info;
1236
1237     if (src_location == SFLAG_INDRAWABLE)
1238     {
1239         TRACE("Source surface %p is onscreen.\n", src_surface);
1240         buffer = surface_get_gl_buffer(src_surface);
1241         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1242     }
1243     else
1244     {
1245         TRACE("Source surface %p is offscreen.\n", src_surface);
1246         buffer = GL_COLOR_ATTACHMENT0;
1247     }
1248
1249     ENTER_GL();
1250     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1251     glReadBuffer(buffer);
1252     checkGLcall("glReadBuffer()");
1253     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1254     LEAVE_GL();
1255
1256     if (dst_location == SFLAG_INDRAWABLE)
1257     {
1258         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1259         buffer = surface_get_gl_buffer(dst_surface);
1260         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1261     }
1262     else
1263     {
1264         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1265         buffer = GL_COLOR_ATTACHMENT0;
1266     }
1267
1268     ENTER_GL();
1269     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1270     context_set_draw_buffer(context, buffer);
1271     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1272     context_invalidate_state(context, STATE_FRAMEBUFFER);
1273
1274     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1275     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE));
1276     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE1));
1277     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE2));
1278     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE3));
1279
1280     glDisable(GL_SCISSOR_TEST);
1281     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1282
1283     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1284             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1285     checkGLcall("glBlitFramebuffer()");
1286
1287     LEAVE_GL();
1288
1289     if (wined3d_settings.strict_draw_ordering
1290             || (dst_location == SFLAG_INDRAWABLE
1291             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1292         wglFlush();
1293
1294     context_release(context);
1295 }
1296
1297 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1298         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
1299         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
1300 {
1301     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1302         return FALSE;
1303
1304     /* Source and/or destination need to be on the GL side */
1305     if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
1306         return FALSE;
1307
1308     switch (blit_op)
1309     {
1310         case WINED3D_BLIT_OP_COLOR_BLIT:
1311             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1312                 return FALSE;
1313             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1314                 return FALSE;
1315             break;
1316
1317         case WINED3D_BLIT_OP_DEPTH_BLIT:
1318             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1319                 return FALSE;
1320             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1321                 return FALSE;
1322             break;
1323
1324         default:
1325             return FALSE;
1326     }
1327
1328     if (!(src_format->id == dst_format->id
1329             || (is_identity_fixup(src_format->color_fixup)
1330             && is_identity_fixup(dst_format->color_fixup))))
1331         return FALSE;
1332
1333     return TRUE;
1334 }
1335
1336 /* This function checks if the primary render target uses the 8bit paletted format. */
1337 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1338 {
1339     if (device->fb.render_targets && device->fb.render_targets[0])
1340     {
1341         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1342         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1343                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1344             return TRUE;
1345     }
1346     return FALSE;
1347 }
1348
1349 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1350         DWORD color, WINED3DCOLORVALUE *float_color)
1351 {
1352     const struct wined3d_format *format = surface->resource.format;
1353     const struct wined3d_device *device = surface->resource.device;
1354
1355     switch (format->id)
1356     {
1357         case WINED3DFMT_P8_UINT:
1358             if (surface->palette)
1359             {
1360                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1361                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1362                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1363             }
1364             else
1365             {
1366                 float_color->r = 0.0f;
1367                 float_color->g = 0.0f;
1368                 float_color->b = 0.0f;
1369             }
1370             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1371             break;
1372
1373         case WINED3DFMT_B5G6R5_UNORM:
1374             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1375             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1376             float_color->b = (color & 0x1f) / 31.0f;
1377             float_color->a = 1.0f;
1378             break;
1379
1380         case WINED3DFMT_B8G8R8_UNORM:
1381         case WINED3DFMT_B8G8R8X8_UNORM:
1382             float_color->r = D3DCOLOR_R(color);
1383             float_color->g = D3DCOLOR_G(color);
1384             float_color->b = D3DCOLOR_B(color);
1385             float_color->a = 1.0f;
1386             break;
1387
1388         case WINED3DFMT_B8G8R8A8_UNORM:
1389             float_color->r = D3DCOLOR_R(color);
1390             float_color->g = D3DCOLOR_G(color);
1391             float_color->b = D3DCOLOR_B(color);
1392             float_color->a = D3DCOLOR_A(color);
1393             break;
1394
1395         default:
1396             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1397             return FALSE;
1398     }
1399
1400     return TRUE;
1401 }
1402
1403 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1404 {
1405     const struct wined3d_format *format = surface->resource.format;
1406
1407     switch (format->id)
1408     {
1409         case WINED3DFMT_S1_UINT_D15_UNORM:
1410             *float_depth = depth / (float)0x00007fff;
1411             break;
1412
1413         case WINED3DFMT_D16_UNORM:
1414             *float_depth = depth / (float)0x0000ffff;
1415             break;
1416
1417         case WINED3DFMT_D24_UNORM_S8_UINT:
1418         case WINED3DFMT_X8D24_UNORM:
1419             *float_depth = depth / (float)0x00ffffff;
1420             break;
1421
1422         case WINED3DFMT_D32_UNORM:
1423             *float_depth = depth / (float)0xffffffff;
1424             break;
1425
1426         default:
1427             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1428             return FALSE;
1429     }
1430
1431     return TRUE;
1432 }
1433
1434 /* Do not call while under the GL lock. */
1435 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1436 {
1437     const struct wined3d_resource *resource = &surface->resource;
1438     struct wined3d_device *device = resource->device;
1439     const struct blit_shader *blitter;
1440
1441     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1442             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1443     if (!blitter)
1444     {
1445         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1446         return WINED3DERR_INVALIDCALL;
1447     }
1448
1449     return blitter->depth_fill(device, surface, rect, depth);
1450 }
1451
1452 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1453         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1454 {
1455     struct wined3d_device *device = src_surface->resource.device;
1456
1457     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1458             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1459             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1460         return WINED3DERR_INVALIDCALL;
1461
1462     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1463
1464     surface_modify_ds_location(dst_surface, SFLAG_DS_OFFSCREEN,
1465             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1466     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
1467
1468     return WINED3D_OK;
1469 }
1470
1471 /* Do not call while under the GL lock. */
1472 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1473         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1474         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1475 {
1476     struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1477     struct wined3d_device *device = dst_surface->resource.device;
1478     DWORD src_ds_flags, dst_ds_flags;
1479     RECT src_rect, dst_rect;
1480     BOOL scale, convert;
1481
1482     static const DWORD simple_blit = WINEDDBLT_ASYNC
1483             | WINEDDBLT_COLORFILL
1484             | WINEDDBLT_WAIT
1485             | WINEDDBLT_DEPTHFILL
1486             | WINEDDBLT_DONOTWAIT;
1487
1488     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1489             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1490             flags, fx, debug_d3dtexturefiltertype(filter));
1491     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1492
1493     if (fx)
1494     {
1495         TRACE("dwSize %#x.\n", fx->dwSize);
1496         TRACE("dwDDFX %#x.\n", fx->dwDDFX);
1497         TRACE("dwROP %#x.\n", fx->dwROP);
1498         TRACE("dwDDROP %#x.\n", fx->dwDDROP);
1499         TRACE("dwRotationAngle %#x.\n", fx->dwRotationAngle);
1500         TRACE("dwZBufferOpCode %#x.\n", fx->dwZBufferOpCode);
1501         TRACE("dwZBufferLow %#x.\n", fx->dwZBufferLow);
1502         TRACE("dwZBufferHigh %#x.\n", fx->dwZBufferHigh);
1503         TRACE("dwZBufferBaseDest %#x.\n", fx->dwZBufferBaseDest);
1504         TRACE("dwZDestConstBitDepth %#x.\n", fx->dwZDestConstBitDepth);
1505         TRACE("lpDDSZBufferDest %p.\n", fx->u1.lpDDSZBufferDest);
1506         TRACE("dwZSrcConstBitDepth %#x.\n", fx->dwZSrcConstBitDepth);
1507         TRACE("lpDDSZBufferSrc %p.\n", fx->u2.lpDDSZBufferSrc);
1508         TRACE("dwAlphaEdgeBlendBitDepth %#x.\n", fx->dwAlphaEdgeBlendBitDepth);
1509         TRACE("dwAlphaEdgeBlend %#x.\n", fx->dwAlphaEdgeBlend);
1510         TRACE("dwReserved %#x.\n", fx->dwReserved);
1511         TRACE("dwAlphaDestConstBitDepth %#x.\n", fx->dwAlphaDestConstBitDepth);
1512         TRACE("lpDDSAlphaDest %p.\n", fx->u3.lpDDSAlphaDest);
1513         TRACE("dwAlphaSrcConstBitDepth %#x.\n", fx->dwAlphaSrcConstBitDepth);
1514         TRACE("lpDDSAlphaSrc %p.\n", fx->u4.lpDDSAlphaSrc);
1515         TRACE("lpDDSPattern %p.\n", fx->u5.lpDDSPattern);
1516         TRACE("ddckDestColorkey {%#x, %#x}.\n",
1517                 fx->ddckDestColorkey.dwColorSpaceLowValue,
1518                 fx->ddckDestColorkey.dwColorSpaceHighValue);
1519         TRACE("ddckSrcColorkey {%#x, %#x}.\n",
1520                 fx->ddckSrcColorkey.dwColorSpaceLowValue,
1521                 fx->ddckSrcColorkey.dwColorSpaceHighValue);
1522     }
1523
1524     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1525     {
1526         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1527         return WINEDDERR_SURFACEBUSY;
1528     }
1529
1530     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1531
1532     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1533             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1534             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1535             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1536             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1537     {
1538         /* The destination rect can be out of bounds on the condition
1539          * that a clipper is set for the surface. */
1540         if (dst_surface->clipper)
1541             FIXME("Blit clipping not implemented.\n");
1542         else
1543             WARN("The application gave us a bad destination rectangle without a clipper set.\n");
1544         return WINEDDERR_INVALIDRECT;
1545     }
1546
1547     if (src_surface)
1548     {
1549         surface_get_rect(src_surface, src_rect_in, &src_rect);
1550
1551         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1552                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1553                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1554                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1555                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1556         {
1557             WARN("Application gave us bad source rectangle for Blt.\n");
1558             return WINEDDERR_INVALIDRECT;
1559         }
1560     }
1561     else
1562     {
1563         memset(&src_rect, 0, sizeof(src_rect));
1564     }
1565
1566     if (!fx || !(fx->dwDDFX))
1567         flags &= ~WINEDDBLT_DDFX;
1568
1569     if (flags & WINEDDBLT_WAIT)
1570         flags &= ~WINEDDBLT_WAIT;
1571
1572     if (flags & WINEDDBLT_ASYNC)
1573     {
1574         static unsigned int once;
1575
1576         if (!once++)
1577             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1578         flags &= ~WINEDDBLT_ASYNC;
1579     }
1580
1581     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1582     if (flags & WINEDDBLT_DONOTWAIT)
1583     {
1584         static unsigned int once;
1585
1586         if (!once++)
1587             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1588         flags &= ~WINEDDBLT_DONOTWAIT;
1589     }
1590
1591     if (!device->d3d_initialized)
1592     {
1593         WARN("D3D not initialized, using fallback.\n");
1594         goto cpu;
1595     }
1596
1597     /* We want to avoid invalidating the sysmem location for converted
1598      * surfaces, since otherwise we'd have to convert the data back when
1599      * locking them. */
1600     if (dst_surface->flags & SFLAG_CONVERTED)
1601     {
1602         WARN("Converted surface, using CPU blit.\n");
1603         return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1604     }
1605
1606     if (flags & ~simple_blit)
1607     {
1608         WARN("Using fallback for complex blit (%#x).\n", flags);
1609         goto fallback;
1610     }
1611
1612     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1613         src_swapchain = src_surface->container.u.swapchain;
1614     else
1615         src_swapchain = NULL;
1616
1617     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1618         dst_swapchain = dst_surface->container.u.swapchain;
1619     else
1620         dst_swapchain = NULL;
1621
1622     /* This isn't strictly needed. FBO blits for example could deal with
1623      * cross-swapchain blits by first downloading the source to a texture
1624      * before switching to the destination context. We just have this here to
1625      * not have to deal with the issue, since cross-swapchain blits should be
1626      * rare. */
1627     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1628     {
1629         FIXME("Using fallback for cross-swapchain blit.\n");
1630         goto fallback;
1631     }
1632
1633     scale = src_surface
1634             && (src_rect.right - src_rect.left != dst_rect.right - dst_rect.left
1635             || src_rect.bottom - src_rect.top != dst_rect.bottom - dst_rect.top);
1636     convert = src_surface && src_surface->resource.format->id != dst_surface->resource.format->id;
1637
1638     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1639     if (src_surface)
1640         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1641     else
1642         src_ds_flags = 0;
1643
1644     if (src_ds_flags || dst_ds_flags)
1645     {
1646         if (flags & WINEDDBLT_DEPTHFILL)
1647         {
1648             float depth;
1649
1650             TRACE("Depth fill.\n");
1651
1652             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1653                 return WINED3DERR_INVALIDCALL;
1654
1655             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1656                 return WINED3D_OK;
1657         }
1658         else
1659         {
1660             /* Accessing depth / stencil surfaces is supposed to fail while in
1661              * a scene, except for fills, which seem to work. */
1662             if (device->inScene)
1663             {
1664                 WARN("Rejecting depth / stencil access while in scene.\n");
1665                 return WINED3DERR_INVALIDCALL;
1666             }
1667
1668             if (src_ds_flags != dst_ds_flags)
1669             {
1670                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1671                 return WINED3DERR_INVALIDCALL;
1672             }
1673
1674             if (src_rect.top || src_rect.left
1675                     || src_rect.bottom != src_surface->resource.height
1676                     || src_rect.right != src_surface->resource.width)
1677             {
1678                 WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
1679                         wine_dbgstr_rect(&src_rect));
1680                 return WINED3DERR_INVALIDCALL;
1681             }
1682
1683             if (dst_rect.top || dst_rect.left
1684                     || dst_rect.bottom != dst_surface->resource.height
1685                     || dst_rect.right != dst_surface->resource.width)
1686             {
1687                 WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
1688                         wine_dbgstr_rect(&src_rect));
1689                 return WINED3DERR_INVALIDCALL;
1690             }
1691
1692             if (scale)
1693             {
1694                 WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
1695                 return WINED3DERR_INVALIDCALL;
1696             }
1697
1698             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1699                 return WINED3D_OK;
1700         }
1701     }
1702     else
1703     {
1704         /* In principle this would apply to depth blits as well, but we don't
1705          * implement those in the CPU blitter at the moment. */
1706         if ((dst_surface->flags & SFLAG_INSYSMEM)
1707                 && (!src_surface || (src_surface->flags & SFLAG_INSYSMEM)))
1708         {
1709             if (scale)
1710                 TRACE("Not doing sysmem blit because of scaling.\n");
1711             else if (convert)
1712                 TRACE("Not doing sysmem blit because of format conversion.\n");
1713             else
1714                 return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1715         }
1716
1717         if (flags & WINEDDBLT_COLORFILL)
1718         {
1719             WINED3DCOLORVALUE color;
1720
1721             TRACE("Color fill.\n");
1722
1723             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1724                 goto fallback;
1725
1726             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1727                 return WINED3D_OK;
1728         }
1729         else
1730         {
1731             TRACE("Color blit.\n");
1732
1733             /* Use present for back -> front blits. The idea behind this is
1734              * that present is potentially faster than a blit, in particular
1735              * when FBO blits aren't available. Some ddraw applications like
1736              * Half-Life and Prince of Persia 3D use Blt() from the backbuffer
1737              * to the frontbuffer instead of doing a Flip(). D3D8 and D3D9
1738              * applications can't blit directly to the frontbuffer. */
1739             if (dst_swapchain && dst_swapchain->back_buffers
1740                     && dst_surface == dst_swapchain->front_buffer
1741                     && src_surface == dst_swapchain->back_buffers[0])
1742             {
1743                 WINED3DSWAPEFFECT swap_effect = dst_swapchain->presentParms.SwapEffect;
1744
1745                 TRACE("Using present for backbuffer -> frontbuffer blit.\n");
1746
1747                 /* Set the swap effect to COPY, we don't want the backbuffer
1748                  * to become undefined. */
1749                 dst_swapchain->presentParms.SwapEffect = WINED3DSWAPEFFECT_COPY;
1750                 wined3d_swapchain_present(dst_swapchain, NULL, NULL, dst_swapchain->win_handle, NULL, 0);
1751                 dst_swapchain->presentParms.SwapEffect = swap_effect;
1752
1753                 return WINED3D_OK;
1754             }
1755
1756             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1757                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1758                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1759             {
1760                 TRACE("Using FBO blit.\n");
1761
1762                 surface_blt_fbo(device, filter,
1763                         src_surface, src_surface->draw_binding, &src_rect,
1764                         dst_surface, dst_surface->draw_binding, &dst_rect);
1765                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1766                 return WINED3D_OK;
1767             }
1768
1769             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1770                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1771                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1772             {
1773                 TRACE("Using arbfp blit.\n");
1774
1775                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1776                     return WINED3D_OK;
1777             }
1778         }
1779     }
1780
1781 fallback:
1782
1783     /* Special cases for render targets. */
1784     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1785             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1786     {
1787         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1788                 src_surface, &src_rect, flags, fx, filter)))
1789             return WINED3D_OK;
1790     }
1791
1792 cpu:
1793
1794     /* For the rest call the X11 surface implementation. For render targets
1795      * this should be implemented OpenGL accelerated in BltOverride, other
1796      * blits are rather rare. */
1797     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1798 }
1799
1800 /* Do not call while under the GL lock. */
1801 HRESULT CDECL wined3d_surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
1802         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD trans)
1803 {
1804     RECT src_rect, dst_rect;
1805     DWORD flags = 0;
1806
1807     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect_in %s, trans %#x.\n",
1808             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect_in), trans);
1809
1810     surface_get_rect(src_surface, src_rect_in, &src_rect);
1811
1812     dst_rect.left = dst_x;
1813     dst_rect.top = dst_y;
1814     dst_rect.right = dst_x + src_rect.right - src_rect.left;
1815     dst_rect.bottom = dst_y + src_rect.bottom - src_rect.top;
1816
1817     if (trans & WINEDDBLTFAST_SRCCOLORKEY)
1818         flags |= WINEDDBLT_KEYSRC;
1819     if (trans & WINEDDBLTFAST_DESTCOLORKEY)
1820         flags |= WINEDDBLT_KEYDEST;
1821     if (trans & WINEDDBLTFAST_WAIT)
1822         flags |= WINEDDBLT_WAIT;
1823     if (trans & WINEDDBLTFAST_DONOTWAIT)
1824         flags |= WINEDDBLT_DONOTWAIT;
1825
1826     return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, NULL, WINED3DTEXF_POINT);
1827 }
1828
1829 /* Context activation is done by the caller. */
1830 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1831 {
1832     if (!surface->resource.heapMemory)
1833     {
1834         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1835         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1836                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1837     }
1838
1839     ENTER_GL();
1840     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1841     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1842     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1843             surface->resource.size, surface->resource.allocatedMemory));
1844     checkGLcall("glGetBufferSubDataARB");
1845     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1846     checkGLcall("glDeleteBuffersARB");
1847     LEAVE_GL();
1848
1849     surface->pbo = 0;
1850     surface->flags &= ~SFLAG_PBO;
1851 }
1852
1853 /* Do not call while under the GL lock. */
1854 static void surface_unload(struct wined3d_resource *resource)
1855 {
1856     struct wined3d_surface *surface = surface_from_resource(resource);
1857     struct wined3d_renderbuffer_entry *entry, *entry2;
1858     struct wined3d_device *device = resource->device;
1859     const struct wined3d_gl_info *gl_info;
1860     struct wined3d_context *context;
1861
1862     TRACE("surface %p.\n", surface);
1863
1864     if (resource->pool == WINED3DPOOL_DEFAULT)
1865     {
1866         /* Default pool resources are supposed to be destroyed before Reset is called.
1867          * Implicit resources stay however. So this means we have an implicit render target
1868          * or depth stencil. The content may be destroyed, but we still have to tear down
1869          * opengl resources, so we cannot leave early.
1870          *
1871          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1872          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1873          * or the depth stencil into an FBO the texture or render buffer will be removed
1874          * and all flags get lost
1875          */
1876         surface_init_sysmem(surface);
1877         /* We also get here when the ddraw swapchain is destroyed, for example
1878          * for a mode switch. In this case this surface won't necessarily be
1879          * an implicit surface. We have to mark it lost so that the
1880          * application can restore it after the mode switch. */
1881         surface->flags |= SFLAG_LOST;
1882     }
1883     else
1884     {
1885         /* Load the surface into system memory */
1886         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1887         surface_modify_location(surface, surface->draw_binding, FALSE);
1888     }
1889     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1890     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1891     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1892
1893     context = context_acquire(device, NULL);
1894     gl_info = context->gl_info;
1895
1896     /* Destroy PBOs, but load them into real sysmem before */
1897     if (surface->flags & SFLAG_PBO)
1898         surface_remove_pbo(surface, gl_info);
1899
1900     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1901      * all application-created targets the application has to release the surface
1902      * before calling _Reset
1903      */
1904     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1905     {
1906         ENTER_GL();
1907         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1908         LEAVE_GL();
1909         list_remove(&entry->entry);
1910         HeapFree(GetProcessHeap(), 0, entry);
1911     }
1912     list_init(&surface->renderbuffers);
1913     surface->current_renderbuffer = NULL;
1914
1915     ENTER_GL();
1916
1917     /* If we're in a texture, the texture name belongs to the texture.
1918      * Otherwise, destroy it. */
1919     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1920     {
1921         glDeleteTextures(1, &surface->texture_name);
1922         surface->texture_name = 0;
1923         glDeleteTextures(1, &surface->texture_name_srgb);
1924         surface->texture_name_srgb = 0;
1925     }
1926     if (surface->rb_multisample)
1927     {
1928         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1929         surface->rb_multisample = 0;
1930     }
1931     if (surface->rb_resolved)
1932     {
1933         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1934         surface->rb_resolved = 0;
1935     }
1936
1937     LEAVE_GL();
1938
1939     context_release(context);
1940
1941     resource_unload(resource);
1942 }
1943
1944 static const struct wined3d_resource_ops surface_resource_ops =
1945 {
1946     surface_unload,
1947 };
1948
1949 static const struct wined3d_surface_ops surface_ops =
1950 {
1951     surface_private_setup,
1952     surface_cleanup,
1953     surface_realize_palette,
1954     surface_draw_overlay,
1955     surface_preload,
1956     surface_map,
1957     surface_unmap,
1958     surface_getdc,
1959 };
1960
1961 /*****************************************************************************
1962  * Initializes the GDI surface, aka creates the DIB section we render to
1963  * The DIB section creation is done by calling GetDC, which will create the
1964  * section and releasing the dc to allow the app to use it. The dib section
1965  * will stay until the surface is released
1966  *
1967  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1968  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1969  * avoid confusion in the shared surface code.
1970  *
1971  * Returns:
1972  *  WINED3D_OK on success
1973  *  The return values of called methods on failure
1974  *
1975  *****************************************************************************/
1976 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1977 {
1978     HRESULT hr;
1979
1980     TRACE("surface %p.\n", surface);
1981
1982     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1983     {
1984         ERR("Overlays not yet supported by GDI surfaces.\n");
1985         return WINED3DERR_INVALIDCALL;
1986     }
1987
1988     /* Sysmem textures have memory already allocated - release it,
1989      * this avoids an unnecessary memcpy. */
1990     hr = surface_create_dib_section(surface);
1991     if (SUCCEEDED(hr))
1992     {
1993         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1994         surface->resource.heapMemory = NULL;
1995         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1996     }
1997
1998     /* We don't mind the nonpow2 stuff in GDI. */
1999     surface->pow2Width = surface->resource.width;
2000     surface->pow2Height = surface->resource.height;
2001
2002     return WINED3D_OK;
2003 }
2004
2005 static void surface_gdi_cleanup(struct wined3d_surface *surface)
2006 {
2007     struct wined3d_surface *overlay, *cur;
2008
2009     TRACE("surface %p.\n", surface);
2010
2011     if (surface->flags & SFLAG_DIBSECTION)
2012     {
2013         /* Release the DC. */
2014         SelectObject(surface->hDC, surface->dib.holdbitmap);
2015         DeleteDC(surface->hDC);
2016         /* Release the DIB section. */
2017         DeleteObject(surface->dib.DIBsection);
2018         surface->dib.bitmap_data = NULL;
2019         surface->resource.allocatedMemory = NULL;
2020     }
2021
2022     if (surface->flags & SFLAG_USERPTR)
2023         wined3d_surface_set_mem(surface, NULL);
2024     if (surface->overlay_dest)
2025         list_remove(&surface->overlay_entry);
2026
2027     LIST_FOR_EACH_ENTRY_SAFE(overlay, cur, &surface->overlays, struct wined3d_surface, overlay_entry)
2028     {
2029         list_remove(&overlay->overlay_entry);
2030         overlay->overlay_dest = NULL;
2031     }
2032
2033     HeapFree(GetProcessHeap(), 0, surface->palette9);
2034
2035     resource_cleanup(&surface->resource);
2036 }
2037
2038 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
2039 {
2040     struct wined3d_palette *palette = surface->palette;
2041
2042     TRACE("surface %p.\n", surface);
2043
2044     if (!palette) return;
2045
2046     if (surface->flags & SFLAG_DIBSECTION)
2047     {
2048         RGBQUAD col[256];
2049         unsigned int i;
2050
2051         TRACE("Updating the DC's palette.\n");
2052
2053         for (i = 0; i < 256; ++i)
2054         {
2055             col[i].rgbRed = palette->palents[i].peRed;
2056             col[i].rgbGreen = palette->palents[i].peGreen;
2057             col[i].rgbBlue = palette->palents[i].peBlue;
2058             col[i].rgbReserved = 0;
2059         }
2060         SetDIBColorTable(surface->hDC, 0, 256, col);
2061     }
2062
2063     /* Update the image because of the palette change. Some games like e.g.
2064      * Red Alert call SetEntries a lot to implement fading. */
2065     /* Tell the swapchain to update the screen. */
2066     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2067     {
2068         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2069         if (surface == swapchain->front_buffer)
2070         {
2071             x11_copy_to_screen(swapchain, NULL);
2072         }
2073     }
2074 }
2075
2076 static HRESULT gdi_surface_draw_overlay(struct wined3d_surface *surface)
2077 {
2078     FIXME("GDI surfaces can't draw overlays yet.\n");
2079     return E_FAIL;
2080 }
2081
2082 static void gdi_surface_preload(struct wined3d_surface *surface)
2083 {
2084     TRACE("surface %p.\n", surface);
2085
2086     ERR("Preloading GDI surfaces is not supported.\n");
2087 }
2088
2089 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
2090 {
2091     TRACE("surface %p, rect %s, flags %#x.\n",
2092             surface, wine_dbgstr_rect(rect), flags);
2093
2094     if (!surface->resource.allocatedMemory)
2095     {
2096         /* This happens on gdi surfaces if the application set a user pointer
2097          * and resets it. Recreate the DIB section. */
2098         surface_create_dib_section(surface);
2099         surface->resource.allocatedMemory = surface->dib.bitmap_data;
2100     }
2101 }
2102
2103 static void gdi_surface_unmap(struct wined3d_surface *surface)
2104 {
2105     TRACE("surface %p.\n", surface);
2106
2107     /* Tell the swapchain to update the screen. */
2108     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2109     {
2110         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2111         if (surface == swapchain->front_buffer)
2112         {
2113             x11_copy_to_screen(swapchain, &surface->lockedRect);
2114         }
2115     }
2116
2117     memset(&surface->lockedRect, 0, sizeof(RECT));
2118 }
2119
2120 static HRESULT gdi_surface_getdc(struct wined3d_surface *surface)
2121 {
2122     WINED3DLOCKED_RECT lock;
2123     HRESULT hr;
2124
2125     TRACE("surface %p.\n", surface);
2126
2127     /* Should have a DIB section already. */
2128     if (!(surface->flags & SFLAG_DIBSECTION))
2129     {
2130         WARN("DC not supported on this surface\n");
2131         return WINED3DERR_INVALIDCALL;
2132     }
2133
2134     /* Map the surface. */
2135     hr = wined3d_surface_map(surface, &lock, NULL, 0);
2136     if (FAILED(hr))
2137         ERR("Map failed, hr %#x.\n", hr);
2138
2139     return hr;
2140 }
2141
2142 static const struct wined3d_surface_ops gdi_surface_ops =
2143 {
2144     gdi_surface_private_setup,
2145     surface_gdi_cleanup,
2146     gdi_surface_realize_palette,
2147     gdi_surface_draw_overlay,
2148     gdi_surface_preload,
2149     gdi_surface_map,
2150     gdi_surface_unmap,
2151     gdi_surface_getdc,
2152 };
2153
2154 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2155 {
2156     GLuint *name;
2157     DWORD flag;
2158
2159     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2160
2161     if(srgb)
2162     {
2163         name = &surface->texture_name_srgb;
2164         flag = SFLAG_INSRGBTEX;
2165     }
2166     else
2167     {
2168         name = &surface->texture_name;
2169         flag = SFLAG_INTEXTURE;
2170     }
2171
2172     if (!*name && new_name)
2173     {
2174         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2175          * surface has no texture name yet. See if we can get rid of this. */
2176         if (surface->flags & flag)
2177             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2178         surface_modify_location(surface, flag, FALSE);
2179     }
2180
2181     *name = new_name;
2182     surface_force_reload(surface);
2183 }
2184
2185 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
2186 {
2187     TRACE("surface %p, target %#x.\n", surface, target);
2188
2189     if (surface->texture_target != target)
2190     {
2191         if (target == GL_TEXTURE_RECTANGLE_ARB)
2192         {
2193             surface->flags &= ~SFLAG_NORMCOORD;
2194         }
2195         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2196         {
2197             surface->flags |= SFLAG_NORMCOORD;
2198         }
2199     }
2200     surface->texture_target = target;
2201     surface_force_reload(surface);
2202 }
2203
2204 /* Context activation is done by the caller. */
2205 void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
2206 {
2207     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
2208
2209     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2210     {
2211         struct wined3d_texture *texture = surface->container.u.texture;
2212
2213         TRACE("Passing to container (%p).\n", texture);
2214         texture->texture_ops->texture_bind(texture, context, srgb);
2215     }
2216     else
2217     {
2218         if (surface->texture_level)
2219         {
2220             ERR("Standalone surface %p is non-zero texture level %u.\n",
2221                     surface, surface->texture_level);
2222         }
2223
2224         if (srgb)
2225             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
2226
2227         ENTER_GL();
2228
2229         if (!surface->texture_name)
2230         {
2231             glGenTextures(1, &surface->texture_name);
2232             checkGLcall("glGenTextures");
2233
2234             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
2235
2236             context_bind_texture(context, surface->texture_target, surface->texture_name);
2237             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2238             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2239             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
2240             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2241             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2242             checkGLcall("glTexParameteri");
2243         }
2244         else
2245         {
2246             context_bind_texture(context, surface->texture_target, surface->texture_name);
2247         }
2248
2249         LEAVE_GL();
2250     }
2251 }
2252
2253 /* This call just downloads data, the caller is responsible for binding the
2254  * correct texture. */
2255 /* Context activation is done by the caller. */
2256 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2257 {
2258     const struct wined3d_format *format = surface->resource.format;
2259
2260     /* Only support read back of converted P8 surfaces. */
2261     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2262     {
2263         ERR("Trying to read back converted surface %p with format %s.\n", surface, debug_d3dformat(format->id));
2264         return;
2265     }
2266
2267     ENTER_GL();
2268
2269     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2270     {
2271         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2272                 surface, surface->texture_level, format->glFormat, format->glType,
2273                 surface->resource.allocatedMemory);
2274
2275         if (surface->flags & SFLAG_PBO)
2276         {
2277             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2278             checkGLcall("glBindBufferARB");
2279             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2280             checkGLcall("glGetCompressedTexImageARB");
2281             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2282             checkGLcall("glBindBufferARB");
2283         }
2284         else
2285         {
2286             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2287                     surface->texture_level, surface->resource.allocatedMemory));
2288             checkGLcall("glGetCompressedTexImageARB");
2289         }
2290
2291         LEAVE_GL();
2292     }
2293     else
2294     {
2295         void *mem;
2296         GLenum gl_format = format->glFormat;
2297         GLenum gl_type = format->glType;
2298         int src_pitch = 0;
2299         int dst_pitch = 0;
2300
2301         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2302         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2303         {
2304             gl_format = GL_ALPHA;
2305             gl_type = GL_UNSIGNED_BYTE;
2306         }
2307
2308         if (surface->flags & SFLAG_NONPOW2)
2309         {
2310             unsigned char alignment = surface->resource.device->surface_alignment;
2311             src_pitch = format->byte_count * surface->pow2Width;
2312             dst_pitch = wined3d_surface_get_pitch(surface);
2313             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2314             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2315         }
2316         else
2317         {
2318             mem = surface->resource.allocatedMemory;
2319         }
2320
2321         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2322                 surface, surface->texture_level, gl_format, gl_type, mem);
2323
2324         if (surface->flags & SFLAG_PBO)
2325         {
2326             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2327             checkGLcall("glBindBufferARB");
2328
2329             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2330             checkGLcall("glGetTexImage");
2331
2332             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2333             checkGLcall("glBindBufferARB");
2334         }
2335         else
2336         {
2337             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2338             checkGLcall("glGetTexImage");
2339         }
2340         LEAVE_GL();
2341
2342         if (surface->flags & SFLAG_NONPOW2)
2343         {
2344             const BYTE *src_data;
2345             BYTE *dst_data;
2346             UINT y;
2347             /*
2348              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2349              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2350              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2351              *
2352              * We're doing this...
2353              *
2354              * instead of boxing the texture :
2355              * |<-texture width ->|  -->pow2width|   /\
2356              * |111111111111111111|              |   |
2357              * |222 Texture 222222| boxed empty  | texture height
2358              * |3333 Data 33333333|              |   |
2359              * |444444444444444444|              |   \/
2360              * -----------------------------------   |
2361              * |     boxed  empty | boxed empty  | pow2height
2362              * |                  |              |   \/
2363              * -----------------------------------
2364              *
2365              *
2366              * we're repacking the data to the expected texture width
2367              *
2368              * |<-texture width ->|  -->pow2width|   /\
2369              * |111111111111111111222222222222222|   |
2370              * |222333333333333333333444444444444| texture height
2371              * |444444                           |   |
2372              * |                                 |   \/
2373              * |                                 |   |
2374              * |            empty                | pow2height
2375              * |                                 |   \/
2376              * -----------------------------------
2377              *
2378              * == is the same as
2379              *
2380              * |<-texture width ->|    /\
2381              * |111111111111111111|
2382              * |222222222222222222|texture height
2383              * |333333333333333333|
2384              * |444444444444444444|    \/
2385              * --------------------
2386              *
2387              * this also means that any references to allocatedMemory should work with the data as if were a
2388              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2389              *
2390              * internally the texture is still stored in a boxed format so any references to textureName will
2391              * get a boxed texture with width pow2width and not a texture of width resource.width.
2392              *
2393              * Performance should not be an issue, because applications normally do not lock the surfaces when
2394              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2395              * and doesn't have to be re-read. */
2396             src_data = mem;
2397             dst_data = surface->resource.allocatedMemory;
2398             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2399             for (y = 1; y < surface->resource.height; ++y)
2400             {
2401                 /* skip the first row */
2402                 src_data += src_pitch;
2403                 dst_data += dst_pitch;
2404                 memcpy(dst_data, src_data, dst_pitch);
2405             }
2406
2407             HeapFree(GetProcessHeap(), 0, mem);
2408         }
2409     }
2410
2411     /* Surface has now been downloaded */
2412     surface->flags |= SFLAG_INSYSMEM;
2413 }
2414
2415 /* This call just uploads data, the caller is responsible for binding the
2416  * correct texture. */
2417 /* Context activation is done by the caller. */
2418 void surface_upload_data(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2419         const struct wined3d_format *format, const RECT *src_rect, UINT src_w, const POINT *dst_point,
2420         BOOL srgb, const struct wined3d_bo_address *data)
2421 {
2422     UINT update_w = src_rect->right - src_rect->left;
2423     UINT update_h = src_rect->bottom - src_rect->top;
2424
2425     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_w %u, dst_point %p, srgb %#x, data {%#x:%p}.\n",
2426             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_w,
2427             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2428
2429     if (format->heightscale != 1.0f && format->heightscale != 0.0f)
2430         update_h *= format->heightscale;
2431
2432     ENTER_GL();
2433
2434     if (data->buffer_object)
2435     {
2436         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2437         checkGLcall("glBindBufferARB");
2438     }
2439
2440     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2441     {
2442         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2443         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2444         UINT src_pitch = wined3d_format_calculate_size(format, 1, src_w, 1);
2445         const BYTE *addr = data->addr;
2446         GLenum internal;
2447
2448         addr += (src_rect->top / format->block_height) * src_pitch;
2449         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2450
2451         if (srgb)
2452             internal = format->glGammaInternal;
2453         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2454             internal = format->rtInternal;
2455         else
2456             internal = format->glInternal;
2457
2458         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2459                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2460                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2461
2462         if (row_length == src_pitch)
2463         {
2464             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2465                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2466         }
2467         else
2468         {
2469             UINT row, y;
2470
2471             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2472              * can't use the unpack row length like below. */
2473             for (row = 0, y = dst_point->y; row < row_count; ++row)
2474             {
2475                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2476                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2477                 y += format->block_height;
2478                 addr += src_pitch;
2479             }
2480         }
2481         checkGLcall("glCompressedTexSubImage2DARB");
2482     }
2483     else
2484     {
2485         const BYTE *addr = data->addr;
2486
2487         addr += src_rect->top * src_w * format->byte_count;
2488         addr += src_rect->left * format->byte_count;
2489
2490         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2491                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2492                 update_w, update_h, format->glFormat, format->glType, addr);
2493
2494         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_w);
2495         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2496                 update_w, update_h, format->glFormat, format->glType, addr);
2497         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2498         checkGLcall("glTexSubImage2D");
2499     }
2500
2501     if (data->buffer_object)
2502     {
2503         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2504         checkGLcall("glBindBufferARB");
2505     }
2506
2507     LEAVE_GL();
2508
2509     if (wined3d_settings.strict_draw_ordering)
2510         wglFlush();
2511
2512     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2513     {
2514         struct wined3d_device *device = surface->resource.device;
2515         unsigned int i;
2516
2517         for (i = 0; i < device->context_count; ++i)
2518         {
2519             context_surface_update(device->contexts[i], surface);
2520         }
2521     }
2522 }
2523
2524 /* This call just allocates the texture, the caller is responsible for binding
2525  * the correct texture. */
2526 /* Context activation is done by the caller. */
2527 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2528         const struct wined3d_format *format, BOOL srgb)
2529 {
2530     BOOL enable_client_storage = FALSE;
2531     GLsizei width = surface->pow2Width;
2532     GLsizei height = surface->pow2Height;
2533     const BYTE *mem = NULL;
2534     GLenum internal;
2535
2536     if (srgb)
2537     {
2538         internal = format->glGammaInternal;
2539     }
2540     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2541     {
2542         internal = format->rtInternal;
2543     }
2544     else
2545     {
2546         internal = format->glInternal;
2547     }
2548
2549     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2550
2551     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2552             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2553             internal, width, height, format->glFormat, format->glType);
2554
2555     ENTER_GL();
2556
2557     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2558     {
2559         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2560                 || !surface->resource.allocatedMemory)
2561         {
2562             /* In some cases we want to disable client storage.
2563              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2564              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2565              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2566              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2567              */
2568             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2569             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2570             surface->flags &= ~SFLAG_CLIENT;
2571             enable_client_storage = TRUE;
2572         }
2573         else
2574         {
2575             surface->flags |= SFLAG_CLIENT;
2576
2577             /* Point OpenGL to our allocated texture memory. Do not use
2578              * resource.allocatedMemory here because it might point into a
2579              * PBO. Instead use heapMemory, but get the alignment right. */
2580             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2581                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2582         }
2583     }
2584
2585     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2586     {
2587         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2588                 internal, width, height, 0, surface->resource.size, mem));
2589         checkGLcall("glCompressedTexImage2DARB");
2590     }
2591     else
2592     {
2593         glTexImage2D(surface->texture_target, surface->texture_level,
2594                 internal, width, height, 0, format->glFormat, format->glType, mem);
2595         checkGLcall("glTexImage2D");
2596     }
2597
2598     if(enable_client_storage) {
2599         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2600         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2601     }
2602     LEAVE_GL();
2603 }
2604
2605 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2606  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2607 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2608 /* GL locking is done by the caller */
2609 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2610 {
2611     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2612     struct wined3d_renderbuffer_entry *entry;
2613     GLuint renderbuffer = 0;
2614     unsigned int src_width, src_height;
2615     unsigned int width, height;
2616
2617     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2618     {
2619         width = rt->pow2Width;
2620         height = rt->pow2Height;
2621     }
2622     else
2623     {
2624         width = surface->pow2Width;
2625         height = surface->pow2Height;
2626     }
2627
2628     src_width = surface->pow2Width;
2629     src_height = surface->pow2Height;
2630
2631     /* A depth stencil smaller than the render target is not valid */
2632     if (width > src_width || height > src_height) return;
2633
2634     /* Remove any renderbuffer set if the sizes match */
2635     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2636             || (width == src_width && height == src_height))
2637     {
2638         surface->current_renderbuffer = NULL;
2639         return;
2640     }
2641
2642     /* Look if we've already got a renderbuffer of the correct dimensions */
2643     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2644     {
2645         if (entry->width == width && entry->height == height)
2646         {
2647             renderbuffer = entry->id;
2648             surface->current_renderbuffer = entry;
2649             break;
2650         }
2651     }
2652
2653     if (!renderbuffer)
2654     {
2655         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2656         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2657         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2658                 surface->resource.format->glInternal, width, height);
2659
2660         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2661         entry->width = width;
2662         entry->height = height;
2663         entry->id = renderbuffer;
2664         list_add_head(&surface->renderbuffers, &entry->entry);
2665
2666         surface->current_renderbuffer = entry;
2667     }
2668
2669     checkGLcall("set_compatible_renderbuffer");
2670 }
2671
2672 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2673 {
2674     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2675
2676     TRACE("surface %p.\n", surface);
2677
2678     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2679     {
2680         ERR("Surface %p is not on a swapchain.\n", surface);
2681         return GL_NONE;
2682     }
2683
2684     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2685     {
2686         if (swapchain->render_to_fbo)
2687         {
2688             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2689             return GL_COLOR_ATTACHMENT0;
2690         }
2691         TRACE("Returning GL_BACK\n");
2692         return GL_BACK;
2693     }
2694     else if (surface == swapchain->front_buffer)
2695     {
2696         TRACE("Returning GL_FRONT\n");
2697         return GL_FRONT;
2698     }
2699
2700     FIXME("Higher back buffer, returning GL_BACK\n");
2701     return GL_BACK;
2702 }
2703
2704 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2705 void surface_add_dirty_rect(struct wined3d_surface *surface, const WINED3DBOX *dirty_rect)
2706 {
2707     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2708
2709     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2710         /* No partial locking for textures yet. */
2711         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2712
2713     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2714     if (dirty_rect)
2715     {
2716         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->Left);
2717         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->Top);
2718         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->Right);
2719         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->Bottom);
2720     }
2721     else
2722     {
2723         surface->dirtyRect.left = 0;
2724         surface->dirtyRect.top = 0;
2725         surface->dirtyRect.right = surface->resource.width;
2726         surface->dirtyRect.bottom = surface->resource.height;
2727     }
2728
2729     /* if the container is a texture then mark it dirty. */
2730     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2731     {
2732         TRACE("Passing to container.\n");
2733         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2734     }
2735 }
2736
2737 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2738 {
2739     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2740     BOOL ck_changed;
2741
2742     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2743
2744     if (surface->resource.pool == WINED3DPOOL_SCRATCH)
2745     {
2746         ERR("Not supported on scratch surfaces.\n");
2747         return WINED3DERR_INVALIDCALL;
2748     }
2749
2750     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2751
2752     /* Reload if either the texture and sysmem have different ideas about the
2753      * color key, or the actual key values changed. */
2754     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2755             && (surface->glCKey.dwColorSpaceLowValue != surface->SrcBltCKey.dwColorSpaceLowValue
2756             || surface->glCKey.dwColorSpaceHighValue != surface->SrcBltCKey.dwColorSpaceHighValue)))
2757     {
2758         TRACE("Reloading because of color keying\n");
2759         /* To perform the color key conversion we need a sysmem copy of
2760          * the surface. Make sure we have it. */
2761
2762         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2763         /* Make sure the texture is reloaded because of the color key change,
2764          * this kills performance though :( */
2765         /* TODO: This is not necessarily needed with hw palettized texture support. */
2766         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2767         /* Switching color keying on / off may change the internal format. */
2768         if (ck_changed)
2769             surface_force_reload(surface);
2770     }
2771     else if (!(surface->flags & flag))
2772     {
2773         TRACE("Reloading because surface is dirty.\n");
2774     }
2775     else
2776     {
2777         TRACE("surface is already in texture\n");
2778         return WINED3D_OK;
2779     }
2780
2781     /* No partial locking for textures yet. */
2782     surface_load_location(surface, flag, NULL);
2783     surface_evict_sysmem(surface);
2784
2785     return WINED3D_OK;
2786 }
2787
2788 /* See also float_16_to_32() in wined3d_private.h */
2789 static inline unsigned short float_32_to_16(const float *in)
2790 {
2791     int exp = 0;
2792     float tmp = fabsf(*in);
2793     unsigned int mantissa;
2794     unsigned short ret;
2795
2796     /* Deal with special numbers */
2797     if (*in == 0.0f)
2798         return 0x0000;
2799     if (isnan(*in))
2800         return 0x7c01;
2801     if (isinf(*in))
2802         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2803
2804     if (tmp < powf(2, 10))
2805     {
2806         do
2807         {
2808             tmp = tmp * 2.0f;
2809             exp--;
2810         } while (tmp < powf(2, 10));
2811     }
2812     else if (tmp >= powf(2, 11))
2813     {
2814         do
2815         {
2816             tmp /= 2.0f;
2817             exp++;
2818         } while (tmp >= powf(2, 11));
2819     }
2820
2821     mantissa = (unsigned int)tmp;
2822     if (tmp - mantissa >= 0.5f)
2823         ++mantissa; /* Round to nearest, away from zero. */
2824
2825     exp += 10;  /* Normalize the mantissa. */
2826     exp += 15;  /* Exponent is encoded with excess 15. */
2827
2828     if (exp > 30) /* too big */
2829     {
2830         ret = 0x7c00; /* INF */
2831     }
2832     else if (exp <= 0)
2833     {
2834         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2835         while (exp <= 0)
2836         {
2837             mantissa = mantissa >> 1;
2838             ++exp;
2839         }
2840         ret = mantissa & 0x3ff;
2841     }
2842     else
2843     {
2844         ret = (exp << 10) | (mantissa & 0x3ff);
2845     }
2846
2847     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2848     return ret;
2849 }
2850
2851 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2852 {
2853     ULONG refcount;
2854
2855     TRACE("Surface %p, container %p of type %#x.\n",
2856             surface, surface->container.u.base, surface->container.type);
2857
2858     switch (surface->container.type)
2859     {
2860         case WINED3D_CONTAINER_TEXTURE:
2861             return wined3d_texture_incref(surface->container.u.texture);
2862
2863         case WINED3D_CONTAINER_SWAPCHAIN:
2864             return wined3d_swapchain_incref(surface->container.u.swapchain);
2865
2866         default:
2867             ERR("Unhandled container type %#x.\n", surface->container.type);
2868         case WINED3D_CONTAINER_NONE:
2869             break;
2870     }
2871
2872     refcount = InterlockedIncrement(&surface->resource.ref);
2873     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2874
2875     return refcount;
2876 }
2877
2878 /* Do not call while under the GL lock. */
2879 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2880 {
2881     ULONG refcount;
2882
2883     TRACE("Surface %p, container %p of type %#x.\n",
2884             surface, surface->container.u.base, surface->container.type);
2885
2886     switch (surface->container.type)
2887     {
2888         case WINED3D_CONTAINER_TEXTURE:
2889             return wined3d_texture_decref(surface->container.u.texture);
2890
2891         case WINED3D_CONTAINER_SWAPCHAIN:
2892             return wined3d_swapchain_decref(surface->container.u.swapchain);
2893
2894         default:
2895             ERR("Unhandled container type %#x.\n", surface->container.type);
2896         case WINED3D_CONTAINER_NONE:
2897             break;
2898     }
2899
2900     refcount = InterlockedDecrement(&surface->resource.ref);
2901     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2902
2903     if (!refcount)
2904     {
2905         surface->surface_ops->surface_cleanup(surface);
2906         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2907
2908         TRACE("Destroyed surface %p.\n", surface);
2909         HeapFree(GetProcessHeap(), 0, surface);
2910     }
2911
2912     return refcount;
2913 }
2914
2915 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2916 {
2917     return resource_set_priority(&surface->resource, priority);
2918 }
2919
2920 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2921 {
2922     return resource_get_priority(&surface->resource);
2923 }
2924
2925 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2926 {
2927     TRACE("surface %p.\n", surface);
2928
2929     surface->surface_ops->surface_preload(surface);
2930 }
2931
2932 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2933 {
2934     TRACE("surface %p.\n", surface);
2935
2936     return surface->resource.parent;
2937 }
2938
2939 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2940 {
2941     TRACE("surface %p.\n", surface);
2942
2943     return &surface->resource;
2944 }
2945
2946 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2947 {
2948     TRACE("surface %p, flags %#x.\n", surface, flags);
2949
2950     switch (flags)
2951     {
2952         case WINEDDGBS_CANBLT:
2953         case WINEDDGBS_ISBLTDONE:
2954             return WINED3D_OK;
2955
2956         default:
2957             return WINED3DERR_INVALIDCALL;
2958     }
2959 }
2960
2961 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2962 {
2963     TRACE("surface %p, flags %#x.\n", surface, flags);
2964
2965     /* XXX: DDERR_INVALIDSURFACETYPE */
2966
2967     switch (flags)
2968     {
2969         case WINEDDGFS_CANFLIP:
2970         case WINEDDGFS_ISFLIPDONE:
2971             return WINED3D_OK;
2972
2973         default:
2974             return WINED3DERR_INVALIDCALL;
2975     }
2976 }
2977
2978 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2979 {
2980     TRACE("surface %p.\n", surface);
2981
2982     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2983     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2984 }
2985
2986 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2987 {
2988     TRACE("surface %p.\n", surface);
2989
2990     surface->flags &= ~SFLAG_LOST;
2991     return WINED3D_OK;
2992 }
2993
2994 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2995 {
2996     TRACE("surface %p, palette %p.\n", surface, palette);
2997
2998     if (surface->palette == palette)
2999     {
3000         TRACE("Nop palette change.\n");
3001         return WINED3D_OK;
3002     }
3003
3004     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
3005         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
3006
3007     surface->palette = palette;
3008
3009     if (palette)
3010     {
3011         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3012             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
3013
3014         surface->surface_ops->surface_realize_palette(surface);
3015     }
3016
3017     return WINED3D_OK;
3018 }
3019
3020 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
3021         DWORD flags, const WINEDDCOLORKEY *color_key)
3022 {
3023     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
3024
3025     if (flags & WINEDDCKEY_COLORSPACE)
3026     {
3027         FIXME(" colorkey value not supported (%08x) !\n", flags);
3028         return WINED3DERR_INVALIDCALL;
3029     }
3030
3031     /* Dirtify the surface, but only if a key was changed. */
3032     if (color_key)
3033     {
3034         switch (flags & ~WINEDDCKEY_COLORSPACE)
3035         {
3036             case WINEDDCKEY_DESTBLT:
3037                 surface->DestBltCKey = *color_key;
3038                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3039                 break;
3040
3041             case WINEDDCKEY_DESTOVERLAY:
3042                 surface->DestOverlayCKey = *color_key;
3043                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3044                 break;
3045
3046             case WINEDDCKEY_SRCOVERLAY:
3047                 surface->SrcOverlayCKey = *color_key;
3048                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3049                 break;
3050
3051             case WINEDDCKEY_SRCBLT:
3052                 surface->SrcBltCKey = *color_key;
3053                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3054                 break;
3055         }
3056     }
3057     else
3058     {
3059         switch (flags & ~WINEDDCKEY_COLORSPACE)
3060         {
3061             case WINEDDCKEY_DESTBLT:
3062                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3063                 break;
3064
3065             case WINEDDCKEY_DESTOVERLAY:
3066                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3067                 break;
3068
3069             case WINEDDCKEY_SRCOVERLAY:
3070                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3071                 break;
3072
3073             case WINEDDCKEY_SRCBLT:
3074                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3075                 break;
3076         }
3077     }
3078
3079     return WINED3D_OK;
3080 }
3081
3082 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3083 {
3084     TRACE("surface %p.\n", surface);
3085
3086     return surface->palette;
3087 }
3088
3089 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3090 {
3091     const struct wined3d_format *format = surface->resource.format;
3092     DWORD pitch;
3093
3094     TRACE("surface %p.\n", surface);
3095
3096     if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3097     {
3098         /* Since compressed formats are block based, pitch means the amount of
3099          * bytes to the next row of block rather than the next row of pixels. */
3100         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3101         pitch = row_block_count * format->block_byte_count;
3102     }
3103     else
3104     {
3105         unsigned char alignment = surface->resource.device->surface_alignment;
3106         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3107         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3108     }
3109
3110     TRACE("Returning %u.\n", pitch);
3111
3112     return pitch;
3113 }
3114
3115 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3116 {
3117     TRACE("surface %p, mem %p.\n", surface, mem);
3118
3119     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
3120     {
3121         WARN("Surface is locked or the DC is in use.\n");
3122         return WINED3DERR_INVALIDCALL;
3123     }
3124
3125     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3126     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3127     {
3128         ERR("Not supported on render targets.\n");
3129         return WINED3DERR_INVALIDCALL;
3130     }
3131
3132     if (mem && mem != surface->resource.allocatedMemory)
3133     {
3134         void *release = NULL;
3135
3136         /* Do I have to copy the old surface content? */
3137         if (surface->flags & SFLAG_DIBSECTION)
3138         {
3139             SelectObject(surface->hDC, surface->dib.holdbitmap);
3140             DeleteDC(surface->hDC);
3141             /* Release the DIB section. */
3142             DeleteObject(surface->dib.DIBsection);
3143             surface->dib.bitmap_data = NULL;
3144             surface->resource.allocatedMemory = NULL;
3145             surface->hDC = NULL;
3146             surface->flags &= ~SFLAG_DIBSECTION;
3147         }
3148         else if (!(surface->flags & SFLAG_USERPTR))
3149         {
3150             release = surface->resource.heapMemory;
3151             surface->resource.heapMemory = NULL;
3152         }
3153         surface->resource.allocatedMemory = mem;
3154         surface->flags |= SFLAG_USERPTR;
3155
3156         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3157         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3158
3159         /* For client textures OpenGL has to be notified. */
3160         if (surface->flags & SFLAG_CLIENT)
3161             surface_release_client_storage(surface);
3162
3163         /* Now free the old memory if any. */
3164         HeapFree(GetProcessHeap(), 0, release);
3165     }
3166     else if (surface->flags & SFLAG_USERPTR)
3167     {
3168         /* HeapMemory should be NULL already. */
3169         if (surface->resource.heapMemory)
3170             ERR("User pointer surface has heap memory allocated.\n");
3171
3172         if (!mem)
3173         {
3174             surface->resource.allocatedMemory = NULL;
3175             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3176
3177             if (surface->flags & SFLAG_CLIENT)
3178                 surface_release_client_storage(surface);
3179
3180             surface_prepare_system_memory(surface);
3181         }
3182
3183         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3184     }
3185
3186     return WINED3D_OK;
3187 }
3188
3189 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3190 {
3191     LONG w, h;
3192
3193     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3194
3195     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3196     {
3197         WARN("Not an overlay surface.\n");
3198         return WINEDDERR_NOTAOVERLAYSURFACE;
3199     }
3200
3201     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3202     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3203     surface->overlay_destrect.left = x;
3204     surface->overlay_destrect.top = y;
3205     surface->overlay_destrect.right = x + w;
3206     surface->overlay_destrect.bottom = y + h;
3207
3208     surface->surface_ops->surface_draw_overlay(surface);
3209
3210     return WINED3D_OK;
3211 }
3212
3213 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3214 {
3215     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3216
3217     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3218     {
3219         TRACE("Not an overlay surface.\n");
3220         return WINEDDERR_NOTAOVERLAYSURFACE;
3221     }
3222
3223     if (!surface->overlay_dest)
3224     {
3225         TRACE("Overlay not visible.\n");
3226         *x = 0;
3227         *y = 0;
3228         return WINEDDERR_OVERLAYNOTVISIBLE;
3229     }
3230
3231     *x = surface->overlay_destrect.left;
3232     *y = surface->overlay_destrect.top;
3233
3234     TRACE("Returning position %d, %d.\n", *x, *y);
3235
3236     return WINED3D_OK;
3237 }
3238
3239 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3240         DWORD flags, struct wined3d_surface *ref)
3241 {
3242     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3243
3244     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3245     {
3246         TRACE("Not an overlay surface.\n");
3247         return WINEDDERR_NOTAOVERLAYSURFACE;
3248     }
3249
3250     return WINED3D_OK;
3251 }
3252
3253 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3254         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3255 {
3256     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3257             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3258
3259     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3260     {
3261         WARN("Not an overlay surface.\n");
3262         return WINEDDERR_NOTAOVERLAYSURFACE;
3263     }
3264     else if (!dst_surface)
3265     {
3266         WARN("Dest surface is NULL.\n");
3267         return WINED3DERR_INVALIDCALL;
3268     }
3269
3270     if (src_rect)
3271     {
3272         surface->overlay_srcrect = *src_rect;
3273     }
3274     else
3275     {
3276         surface->overlay_srcrect.left = 0;
3277         surface->overlay_srcrect.top = 0;
3278         surface->overlay_srcrect.right = surface->resource.width;
3279         surface->overlay_srcrect.bottom = surface->resource.height;
3280     }
3281
3282     if (dst_rect)
3283     {
3284         surface->overlay_destrect = *dst_rect;
3285     }
3286     else
3287     {
3288         surface->overlay_destrect.left = 0;
3289         surface->overlay_destrect.top = 0;
3290         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3291         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3292     }
3293
3294     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3295     {
3296         surface->overlay_dest = NULL;
3297         list_remove(&surface->overlay_entry);
3298     }
3299
3300     if (flags & WINEDDOVER_SHOW)
3301     {
3302         if (surface->overlay_dest != dst_surface)
3303         {
3304             surface->overlay_dest = dst_surface;
3305             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3306         }
3307     }
3308     else if (flags & WINEDDOVER_HIDE)
3309     {
3310         /* tests show that the rectangles are erased on hide */
3311         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3312         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3313         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3314         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3315         surface->overlay_dest = NULL;
3316     }
3317
3318     surface->surface_ops->surface_draw_overlay(surface);
3319
3320     return WINED3D_OK;
3321 }
3322
3323 HRESULT CDECL wined3d_surface_set_clipper(struct wined3d_surface *surface, struct wined3d_clipper *clipper)
3324 {
3325     TRACE("surface %p, clipper %p.\n", surface, clipper);
3326
3327     surface->clipper = clipper;
3328
3329     return WINED3D_OK;
3330 }
3331
3332 struct wined3d_clipper * CDECL wined3d_surface_get_clipper(const struct wined3d_surface *surface)
3333 {
3334     TRACE("surface %p.\n", surface);
3335
3336     return surface->clipper;
3337 }
3338
3339 HRESULT CDECL wined3d_surface_set_format(struct wined3d_surface *surface, enum wined3d_format_id format_id)
3340 {
3341     const struct wined3d_format *format = wined3d_get_format(&surface->resource.device->adapter->gl_info, format_id);
3342
3343     TRACE("surface %p, format %s.\n", surface, debug_d3dformat(format_id));
3344
3345     if (surface->resource.format->id != WINED3DFMT_UNKNOWN)
3346     {
3347         FIXME("The format of the surface must be WINED3DFORMAT_UNKNOWN.\n");
3348         return WINED3DERR_INVALIDCALL;
3349     }
3350
3351     surface->resource.size = wined3d_format_calculate_size(format, surface->resource.device->surface_alignment,
3352             surface->pow2Width, surface->pow2Height);
3353     surface->flags |= (WINED3DFMT_D16_LOCKABLE == format_id) ? SFLAG_LOCKABLE : 0;
3354     surface->resource.format = format;
3355
3356     TRACE("size %u, byte_count %u\n", surface->resource.size, format->byte_count);
3357     TRACE("glFormat %#x, glInternal %#x, glType %#x.\n",
3358             format->glFormat, format->glInternal, format->glType);
3359
3360     return WINED3D_OK;
3361 }
3362
3363 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3364         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3365 {
3366     unsigned short *dst_s;
3367     const float *src_f;
3368     unsigned int x, y;
3369
3370     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3371
3372     for (y = 0; y < h; ++y)
3373     {
3374         src_f = (const float *)(src + y * pitch_in);
3375         dst_s = (unsigned short *) (dst + y * pitch_out);
3376         for (x = 0; x < w; ++x)
3377         {
3378             dst_s[x] = float_32_to_16(src_f + x);
3379         }
3380     }
3381 }
3382
3383 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3384         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3385 {
3386     static const unsigned char convert_5to8[] =
3387     {
3388         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3389         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3390         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3391         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3392     };
3393     static const unsigned char convert_6to8[] =
3394     {
3395         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3396         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3397         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3398         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3399         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3400         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3401         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3402         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3403     };
3404     unsigned int x, y;
3405
3406     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3407
3408     for (y = 0; y < h; ++y)
3409     {
3410         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3411         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3412         for (x = 0; x < w; ++x)
3413         {
3414             WORD pixel = src_line[x];
3415             dst_line[x] = 0xff000000
3416                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3417                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3418                     | convert_5to8[(pixel & 0x001f)];
3419         }
3420     }
3421 }
3422
3423 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3424  * in both cases we're just setting the X / Alpha channel to 0xff. */
3425 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3426         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3427 {
3428     unsigned int x, y;
3429
3430     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3431
3432     for (y = 0; y < h; ++y)
3433     {
3434         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3435         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3436
3437         for (x = 0; x < w; ++x)
3438         {
3439             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3440         }
3441     }
3442 }
3443
3444 static inline BYTE cliptobyte(int x)
3445 {
3446     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3447 }
3448
3449 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3450         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3451 {
3452     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3453     unsigned int x, y;
3454
3455     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3456
3457     for (y = 0; y < h; ++y)
3458     {
3459         const BYTE *src_line = src + y * pitch_in;
3460         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3461         for (x = 0; x < w; ++x)
3462         {
3463             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3464              *     C = Y - 16; D = U - 128; E = V - 128;
3465              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3466              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3467              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3468              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3469              * U and V are shared between the pixels. */
3470             if (!(x & 1)) /* For every even pixel, read new U and V. */
3471             {
3472                 d = (int) src_line[1] - 128;
3473                 e = (int) src_line[3] - 128;
3474                 r2 = 409 * e + 128;
3475                 g2 = - 100 * d - 208 * e + 128;
3476                 b2 = 516 * d + 128;
3477             }
3478             c2 = 298 * ((int) src_line[0] - 16);
3479             dst_line[x] = 0xff000000
3480                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3481                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3482                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3483                 /* Scale RGB values to 0..255 range,
3484                  * then clip them if still not in range (may be negative),
3485                  * then shift them within DWORD if necessary. */
3486             src_line += 2;
3487         }
3488     }
3489 }
3490
3491 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3492         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3493 {
3494     unsigned int x, y;
3495     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3496
3497     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3498
3499     for (y = 0; y < h; ++y)
3500     {
3501         const BYTE *src_line = src + y * pitch_in;
3502         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3503         for (x = 0; x < w; ++x)
3504         {
3505             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3506              *     C = Y - 16; D = U - 128; E = V - 128;
3507              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3508              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3509              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3510              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3511              * U and V are shared between the pixels. */
3512             if (!(x & 1)) /* For every even pixel, read new U and V. */
3513             {
3514                 d = (int) src_line[1] - 128;
3515                 e = (int) src_line[3] - 128;
3516                 r2 = 409 * e + 128;
3517                 g2 = - 100 * d - 208 * e + 128;
3518                 b2 = 516 * d + 128;
3519             }
3520             c2 = 298 * ((int) src_line[0] - 16);
3521             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3522                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3523                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3524                 /* Scale RGB values to 0..255 range,
3525                  * then clip them if still not in range (may be negative),
3526                  * then shift them within DWORD if necessary. */
3527             src_line += 2;
3528         }
3529     }
3530 }
3531
3532 struct d3dfmt_convertor_desc
3533 {
3534     enum wined3d_format_id from, to;
3535     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3536 };
3537
3538 static const struct d3dfmt_convertor_desc convertors[] =
3539 {
3540     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3541     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3542     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3543     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3544     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3545     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3546 };
3547
3548 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3549         enum wined3d_format_id to)
3550 {
3551     unsigned int i;
3552
3553     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3554     {
3555         if (convertors[i].from == from && convertors[i].to == to)
3556             return &convertors[i];
3557     }
3558
3559     return NULL;
3560 }
3561
3562 /*****************************************************************************
3563  * surface_convert_format
3564  *
3565  * Creates a duplicate of a surface in a different format. Is used by Blt to
3566  * blit between surfaces with different formats.
3567  *
3568  * Parameters
3569  *  source: Source surface
3570  *  fmt: Requested destination format
3571  *
3572  *****************************************************************************/
3573 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3574 {
3575     const struct d3dfmt_convertor_desc *conv;
3576     WINED3DLOCKED_RECT lock_src, lock_dst;
3577     struct wined3d_surface *ret = NULL;
3578     HRESULT hr;
3579
3580     conv = find_convertor(source->resource.format->id, to_fmt);
3581     if (!conv)
3582     {
3583         FIXME("Cannot find a conversion function from format %s to %s.\n",
3584                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3585         return NULL;
3586     }
3587
3588     wined3d_surface_create(source->resource.device, source->resource.width,
3589             source->resource.height, to_fmt, TRUE /* lockable */, TRUE /* discard  */, 0 /* level */,
3590             0 /* usage */, WINED3DPOOL_SCRATCH, WINED3DMULTISAMPLE_NONE /* TODO: Multisampled conversion */,
3591             0 /* MultiSampleQuality */, source->surface_type, NULL /* parent */, &wined3d_null_parent_ops, &ret);
3592     if (!ret)
3593     {
3594         ERR("Failed to create a destination surface for conversion.\n");
3595         return NULL;
3596     }
3597
3598     memset(&lock_src, 0, sizeof(lock_src));
3599     memset(&lock_dst, 0, sizeof(lock_dst));
3600
3601     hr = wined3d_surface_map(source, &lock_src, NULL, WINED3DLOCK_READONLY);
3602     if (FAILED(hr))
3603     {
3604         ERR("Failed to lock the source surface.\n");
3605         wined3d_surface_decref(ret);
3606         return NULL;
3607     }
3608     hr = wined3d_surface_map(ret, &lock_dst, NULL, WINED3DLOCK_READONLY);
3609     if (FAILED(hr))
3610     {
3611         ERR("Failed to lock the destination surface.\n");
3612         wined3d_surface_unmap(source);
3613         wined3d_surface_decref(ret);
3614         return NULL;
3615     }
3616
3617     conv->convert(lock_src.pBits, lock_dst.pBits, lock_src.Pitch, lock_dst.Pitch,
3618             source->resource.width, source->resource.height);
3619
3620     wined3d_surface_unmap(ret);
3621     wined3d_surface_unmap(source);
3622
3623     return ret;
3624 }
3625
3626 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3627         unsigned int bpp, UINT pitch, DWORD color)
3628 {
3629     BYTE *first;
3630     int x, y;
3631
3632     /* Do first row */
3633
3634 #define COLORFILL_ROW(type) \
3635 do { \
3636     type *d = (type *)buf; \
3637     for (x = 0; x < width; ++x) \
3638         d[x] = (type)color; \
3639 } while(0)
3640
3641     switch (bpp)
3642     {
3643         case 1:
3644             COLORFILL_ROW(BYTE);
3645             break;
3646
3647         case 2:
3648             COLORFILL_ROW(WORD);
3649             break;
3650
3651         case 3:
3652         {
3653             BYTE *d = buf;
3654             for (x = 0; x < width; ++x, d += 3)
3655             {
3656                 d[0] = (color      ) & 0xFF;
3657                 d[1] = (color >>  8) & 0xFF;
3658                 d[2] = (color >> 16) & 0xFF;
3659             }
3660             break;
3661         }
3662         case 4:
3663             COLORFILL_ROW(DWORD);
3664             break;
3665
3666         default:
3667             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3668             return WINED3DERR_NOTAVAILABLE;
3669     }
3670
3671 #undef COLORFILL_ROW
3672
3673     /* Now copy first row. */
3674     first = buf;
3675     for (y = 1; y < height; ++y)
3676     {
3677         buf += pitch;
3678         memcpy(buf, first, width * bpp);
3679     }
3680
3681     return WINED3D_OK;
3682 }
3683
3684 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3685 {
3686     TRACE("surface %p.\n", surface);
3687
3688     if (!(surface->flags & SFLAG_LOCKED))
3689     {
3690         WARN("Trying to unmap unmapped surface.\n");
3691         return WINEDDERR_NOTLOCKED;
3692     }
3693     surface->flags &= ~SFLAG_LOCKED;
3694
3695     surface->surface_ops->surface_unmap(surface);
3696
3697     return WINED3D_OK;
3698 }
3699
3700 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3701         WINED3DLOCKED_RECT *locked_rect, const RECT *rect, DWORD flags)
3702 {
3703     TRACE("surface %p, locked_rect %p, rect %s, flags %#x.\n",
3704             surface, locked_rect, wine_dbgstr_rect(rect), flags);
3705
3706     if (surface->flags & SFLAG_LOCKED)
3707     {
3708         WARN("Surface is already mapped.\n");
3709         return WINED3DERR_INVALIDCALL;
3710     }
3711     surface->flags |= SFLAG_LOCKED;
3712
3713     if (!(surface->flags & SFLAG_LOCKABLE))
3714         WARN("Trying to lock unlockable surface.\n");
3715
3716     surface->surface_ops->surface_map(surface, rect, flags);
3717
3718     locked_rect->Pitch = wined3d_surface_get_pitch(surface);
3719
3720     if (!rect)
3721     {
3722         locked_rect->pBits = surface->resource.allocatedMemory;
3723         surface->lockedRect.left = 0;
3724         surface->lockedRect.top = 0;
3725         surface->lockedRect.right = surface->resource.width;
3726         surface->lockedRect.bottom = surface->resource.height;
3727     }
3728     else
3729     {
3730         const struct wined3d_format *format = surface->resource.format;
3731
3732         if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3733         {
3734             /* Compressed textures are block based, so calculate the offset of
3735              * the block that contains the top-left pixel of the locked rectangle. */
3736             locked_rect->pBits = surface->resource.allocatedMemory
3737                     + ((rect->top / format->block_height) * locked_rect->Pitch)
3738                     + ((rect->left / format->block_width) * format->block_byte_count);
3739         }
3740         else
3741         {
3742             locked_rect->pBits = surface->resource.allocatedMemory
3743                     + (locked_rect->Pitch * rect->top)
3744                     + (rect->left * format->byte_count);
3745         }
3746         surface->lockedRect.left = rect->left;
3747         surface->lockedRect.top = rect->top;
3748         surface->lockedRect.right = rect->right;
3749         surface->lockedRect.bottom = rect->bottom;
3750     }
3751
3752     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3753     TRACE("Returning memory %p, pitch %u.\n", locked_rect->pBits, locked_rect->Pitch);
3754
3755     return WINED3D_OK;
3756 }
3757
3758 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3759 {
3760     HRESULT hr;
3761
3762     TRACE("surface %p, dc %p.\n", surface, dc);
3763
3764     if (surface->flags & SFLAG_USERPTR)
3765     {
3766         ERR("Not supported on surfaces with application-provided memory.\n");
3767         return WINEDDERR_NODC;
3768     }
3769
3770     /* Give more detailed info for ddraw. */
3771     if (surface->flags & SFLAG_DCINUSE)
3772         return WINEDDERR_DCALREADYCREATED;
3773
3774     /* Can't GetDC if the surface is locked. */
3775     if (surface->flags & SFLAG_LOCKED)
3776         return WINED3DERR_INVALIDCALL;
3777
3778     hr = surface->surface_ops->surface_getdc(surface);
3779     if (FAILED(hr))
3780         return hr;
3781
3782     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3783             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3784     {
3785         /* GetDC on palettized formats is unsupported in D3D9, and the method
3786          * is missing in D3D8, so this should only be used for DX <=7
3787          * surfaces (with non-device palettes). */
3788         const PALETTEENTRY *pal = NULL;
3789
3790         if (surface->palette)
3791         {
3792             pal = surface->palette->palents;
3793         }
3794         else
3795         {
3796             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3797             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3798
3799             if (dds_primary && dds_primary->palette)
3800                 pal = dds_primary->palette->palents;
3801         }
3802
3803         if (pal)
3804         {
3805             RGBQUAD col[256];
3806             unsigned int i;
3807
3808             for (i = 0; i < 256; ++i)
3809             {
3810                 col[i].rgbRed = pal[i].peRed;
3811                 col[i].rgbGreen = pal[i].peGreen;
3812                 col[i].rgbBlue = pal[i].peBlue;
3813                 col[i].rgbReserved = 0;
3814             }
3815             SetDIBColorTable(surface->hDC, 0, 256, col);
3816         }
3817     }
3818
3819     surface->flags |= SFLAG_DCINUSE;
3820
3821     *dc = surface->hDC;
3822     TRACE("Returning dc %p.\n", *dc);
3823
3824     return WINED3D_OK;
3825 }
3826
3827 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3828 {
3829     TRACE("surface %p, dc %p.\n", surface, dc);
3830
3831     if (!(surface->flags & SFLAG_DCINUSE))
3832         return WINEDDERR_NODC;
3833
3834     if (surface->hDC != dc)
3835     {
3836         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3837                 dc, surface->hDC);
3838         return WINEDDERR_NODC;
3839     }
3840
3841     /* Copy the contents of the DIB over to the PBO. */
3842     if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
3843         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
3844
3845     /* We locked first, so unlock now. */
3846     wined3d_surface_unmap(surface);
3847
3848     surface->flags &= ~SFLAG_DCINUSE;
3849
3850     return WINED3D_OK;
3851 }
3852
3853 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3854 {
3855     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3856
3857     if (flags)
3858     {
3859         static UINT once;
3860         if (!once++)
3861             FIXME("Ignoring flags %#x.\n", flags);
3862         else
3863             WARN("Ignoring flags %#x.\n", flags);
3864     }
3865
3866     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
3867     {
3868         ERR("Not supported on swapchain surfaces.\n");
3869         return WINEDDERR_NOTFLIPPABLE;
3870     }
3871
3872     /* Flipping is only supported on render targets and overlays. */
3873     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
3874     {
3875         WARN("Tried to flip a non-render target, non-overlay surface.\n");
3876         return WINEDDERR_NOTFLIPPABLE;
3877     }
3878
3879     flip_surface(surface, override);
3880
3881     /* Update overlays if they're visible. */
3882     if ((surface->resource.usage & WINED3DUSAGE_OVERLAY) && surface->overlay_dest)
3883         return surface->surface_ops->surface_draw_overlay(surface);
3884
3885     return WINED3D_OK;
3886 }
3887
3888 /* Do not call while under the GL lock. */
3889 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3890 {
3891     struct wined3d_device *device = surface->resource.device;
3892
3893     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3894
3895     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3896     {
3897         struct wined3d_texture *texture = surface->container.u.texture;
3898
3899         TRACE("Passing to container (%p).\n", texture);
3900         texture->texture_ops->texture_preload(texture, srgb);
3901     }
3902     else
3903     {
3904         struct wined3d_context *context;
3905
3906         TRACE("(%p) : About to load surface\n", surface);
3907
3908         /* TODO: Use already acquired context when possible. */
3909         context = context_acquire(device, NULL);
3910
3911         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3912
3913         if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3914         {
3915             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3916             GLclampf tmp;
3917             tmp = 0.9f;
3918             ENTER_GL();
3919             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3920             LEAVE_GL();
3921         }
3922
3923         context_release(context);
3924     }
3925 }
3926
3927 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3928 {
3929     if (!surface->resource.allocatedMemory)
3930     {
3931         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
3932                 surface->resource.size + RESOURCE_ALIGNMENT);
3933         if (!surface->resource.heapMemory)
3934         {
3935             ERR("Out of memory\n");
3936             return FALSE;
3937         }
3938         surface->resource.allocatedMemory =
3939             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
3940     }
3941     else
3942     {
3943         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
3944     }
3945
3946     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3947
3948     return TRUE;
3949 }
3950
3951 /* Read the framebuffer back into the surface */
3952 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
3953 {
3954     struct wined3d_device *device = surface->resource.device;
3955     const struct wined3d_gl_info *gl_info;
3956     struct wined3d_context *context;
3957     BYTE *mem;
3958     GLint fmt;
3959     GLint type;
3960     BYTE *row, *top, *bottom;
3961     int i;
3962     BOOL bpp;
3963     RECT local_rect;
3964     BOOL srcIsUpsideDown;
3965     GLint rowLen = 0;
3966     GLint skipPix = 0;
3967     GLint skipRow = 0;
3968
3969     if(wined3d_settings.rendertargetlock_mode == RTL_DISABLE) {
3970         static BOOL warned = FALSE;
3971         if(!warned) {
3972             ERR("The application tries to lock the render target, but render target locking is disabled\n");
3973             warned = TRUE;
3974         }
3975         return;
3976     }
3977
3978     context = context_acquire(device, surface);
3979     context_apply_blit_state(context, device);
3980     gl_info = context->gl_info;
3981
3982     ENTER_GL();
3983
3984     /* Select the correct read buffer, and give some debug output.
3985      * There is no need to keep track of the current read buffer or reset it, every part of the code
3986      * that reads sets the read buffer as desired.
3987      */
3988     if (surface_is_offscreen(surface))
3989     {
3990         /* Mapping the primary render target which is not on a swapchain.
3991          * Read from the back buffer. */
3992         TRACE("Mapping offscreen render target.\n");
3993         glReadBuffer(device->offscreenBuffer);
3994         srcIsUpsideDown = TRUE;
3995     }
3996     else
3997     {
3998         /* Onscreen surfaces are always part of a swapchain */
3999         GLenum buffer = surface_get_gl_buffer(surface);
4000         TRACE("Mapping %#x buffer.\n", buffer);
4001         glReadBuffer(buffer);
4002         checkGLcall("glReadBuffer");
4003         srcIsUpsideDown = FALSE;
4004     }
4005
4006     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
4007     if (!rect)
4008     {
4009         local_rect.left = 0;
4010         local_rect.top = 0;
4011         local_rect.right = surface->resource.width;
4012         local_rect.bottom = surface->resource.height;
4013     }
4014     else
4015     {
4016         local_rect = *rect;
4017     }
4018     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
4019
4020     switch (surface->resource.format->id)
4021     {
4022         case WINED3DFMT_P8_UINT:
4023         {
4024             if (primary_render_target_is_p8(device))
4025             {
4026                 /* In case of P8 render targets the index is stored in the alpha component */
4027                 fmt = GL_ALPHA;
4028                 type = GL_UNSIGNED_BYTE;
4029                 mem = dest;
4030                 bpp = surface->resource.format->byte_count;
4031             }
4032             else
4033             {
4034                 /* GL can't return palettized data, so read ARGB pixels into a
4035                  * separate block of memory and convert them into palettized format
4036                  * in software. Slow, but if the app means to use palettized render
4037                  * targets and locks it...
4038                  *
4039                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4040                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4041                  * for the color channels when palettizing the colors.
4042                  */
4043                 fmt = GL_RGB;
4044                 type = GL_UNSIGNED_BYTE;
4045                 pitch *= 3;
4046                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4047                 if (!mem)
4048                 {
4049                     ERR("Out of memory\n");
4050                     LEAVE_GL();
4051                     return;
4052                 }
4053                 bpp = surface->resource.format->byte_count * 3;
4054             }
4055         }
4056         break;
4057
4058         default:
4059             mem = dest;
4060             fmt = surface->resource.format->glFormat;
4061             type = surface->resource.format->glType;
4062             bpp = surface->resource.format->byte_count;
4063     }
4064
4065     if (surface->flags & SFLAG_PBO)
4066     {
4067         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4068         checkGLcall("glBindBufferARB");
4069         if (mem)
4070         {
4071             ERR("mem not null for pbo -- unexpected\n");
4072             mem = NULL;
4073         }
4074     }
4075
4076     /* Save old pixel store pack state */
4077     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4078     checkGLcall("glGetIntegerv");
4079     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4080     checkGLcall("glGetIntegerv");
4081     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4082     checkGLcall("glGetIntegerv");
4083
4084     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4085     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4086     checkGLcall("glPixelStorei");
4087     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4088     checkGLcall("glPixelStorei");
4089     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4090     checkGLcall("glPixelStorei");
4091
4092     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4093             local_rect.right - local_rect.left,
4094             local_rect.bottom - local_rect.top,
4095             fmt, type, mem);
4096     checkGLcall("glReadPixels");
4097
4098     /* Reset previous pixel store pack state */
4099     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4100     checkGLcall("glPixelStorei");
4101     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4102     checkGLcall("glPixelStorei");
4103     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4104     checkGLcall("glPixelStorei");
4105
4106     if (surface->flags & SFLAG_PBO)
4107     {
4108         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4109         checkGLcall("glBindBufferARB");
4110
4111         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4112          * to get a pointer to it and perform the flipping in software. This is a lot
4113          * faster than calling glReadPixels for each line. In case we want more speed
4114          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4115         if (!srcIsUpsideDown)
4116         {
4117             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4118             checkGLcall("glBindBufferARB");
4119
4120             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4121             checkGLcall("glMapBufferARB");
4122         }
4123     }
4124
4125     /* TODO: Merge this with the palettization loop below for P8 targets */
4126     if(!srcIsUpsideDown) {
4127         UINT len, off;
4128         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4129             Flip the lines in software */
4130         len = (local_rect.right - local_rect.left) * bpp;
4131         off = local_rect.left * bpp;
4132
4133         row = HeapAlloc(GetProcessHeap(), 0, len);
4134         if(!row) {
4135             ERR("Out of memory\n");
4136             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4137                 HeapFree(GetProcessHeap(), 0, mem);
4138             LEAVE_GL();
4139             return;
4140         }
4141
4142         top = mem + pitch * local_rect.top;
4143         bottom = mem + pitch * (local_rect.bottom - 1);
4144         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4145             memcpy(row, top + off, len);
4146             memcpy(top + off, bottom + off, len);
4147             memcpy(bottom + off, row, len);
4148             top += pitch;
4149             bottom -= pitch;
4150         }
4151         HeapFree(GetProcessHeap(), 0, row);
4152
4153         /* Unmap the temp PBO buffer */
4154         if (surface->flags & SFLAG_PBO)
4155         {
4156             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4157             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4158         }
4159     }
4160
4161     LEAVE_GL();
4162     context_release(context);
4163
4164     /* For P8 textures we need to perform an inverse palette lookup. This is
4165      * done by searching for a palette index which matches the RGB value.
4166      * Note this isn't guaranteed to work when there are multiple entries for
4167      * the same color but we have no choice. In case of P8 render targets,
4168      * the index is stored in the alpha component so no conversion is needed. */
4169     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4170     {
4171         const PALETTEENTRY *pal = NULL;
4172         DWORD width = pitch / 3;
4173         int x, y, c;
4174
4175         if (surface->palette)
4176         {
4177             pal = surface->palette->palents;
4178         }
4179         else
4180         {
4181             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4182             HeapFree(GetProcessHeap(), 0, mem);
4183             return;
4184         }
4185
4186         for(y = local_rect.top; y < local_rect.bottom; y++) {
4187             for(x = local_rect.left; x < local_rect.right; x++) {
4188                 /*                      start              lines            pixels      */
4189                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4190                 const BYTE *green = blue  + 1;
4191                 const BYTE *red = green + 1;
4192
4193                 for(c = 0; c < 256; c++) {
4194                     if(*red   == pal[c].peRed   &&
4195                        *green == pal[c].peGreen &&
4196                        *blue  == pal[c].peBlue)
4197                     {
4198                         *((BYTE *) dest + y * width + x) = c;
4199                         break;
4200                     }
4201                 }
4202             }
4203         }
4204         HeapFree(GetProcessHeap(), 0, mem);
4205     }
4206 }
4207
4208 /* Read the framebuffer contents into a texture */
4209 static void read_from_framebuffer_texture(struct wined3d_surface *surface, BOOL srgb)
4210 {
4211     struct wined3d_device *device = surface->resource.device;
4212     struct wined3d_context *context;
4213
4214     if (!surface_is_offscreen(surface))
4215     {
4216         /* We would need to flip onscreen surfaces, but there's no efficient
4217          * way to do that here. It makes more sense for the caller to
4218          * explicitly go through sysmem. */
4219         ERR("Not supported for onscreen targets.\n");
4220         return;
4221     }
4222
4223     /* Activate the surface to read from. In some situations it isn't the currently active target(e.g. backbuffer
4224      * locking during offscreen rendering). RESOURCELOAD is ok because glCopyTexSubImage2D isn't affected by any
4225      * states in the stateblock, and no driver was found yet that had bugs in that regard.
4226      */
4227     context = context_acquire(device, surface);
4228     device_invalidate_state(device, STATE_FRAMEBUFFER);
4229
4230     surface_prepare_texture(surface, context, srgb);
4231     surface_bind_and_dirtify(surface, context, srgb);
4232
4233     TRACE("Reading back offscreen render target %p.\n", surface);
4234
4235     ENTER_GL();
4236
4237     glReadBuffer(device->offscreenBuffer);
4238     checkGLcall("glReadBuffer");
4239
4240     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4241             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4242     checkGLcall("glCopyTexSubImage2D");
4243
4244     LEAVE_GL();
4245
4246     context_release(context);
4247 }
4248
4249 /* Context activation is done by the caller. */
4250 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4251         struct wined3d_context *context, BOOL srgb)
4252 {
4253     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4254     CONVERT_TYPES convert;
4255     struct wined3d_format format;
4256
4257     if (surface->flags & alloc_flag) return;
4258
4259     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4260     if (convert != NO_CONVERSION || format.convert) surface->flags |= SFLAG_CONVERTED;
4261     else surface->flags &= ~SFLAG_CONVERTED;
4262
4263     surface_bind_and_dirtify(surface, context, srgb);
4264     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4265     surface->flags |= alloc_flag;
4266 }
4267
4268 /* Context activation is done by the caller. */
4269 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4270 {
4271     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4272     {
4273         struct wined3d_texture *texture = surface->container.u.texture;
4274         UINT sub_count = texture->level_count * texture->layer_count;
4275         UINT i;
4276
4277         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4278
4279         for (i = 0; i < sub_count; ++i)
4280         {
4281             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4282             surface_prepare_texture_internal(s, context, srgb);
4283         }
4284
4285         return;
4286     }
4287
4288     surface_prepare_texture_internal(surface, context, srgb);
4289 }
4290
4291 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4292 {
4293     if (multisample)
4294     {
4295         if (surface->rb_multisample)
4296             return;
4297
4298         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4299         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4300         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4301                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4302         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4303     }
4304     else
4305     {
4306         if (surface->rb_resolved)
4307             return;
4308
4309         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4310         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4311         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4312                 surface->pow2Width, surface->pow2Height);
4313         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4314     }
4315 }
4316
4317 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4318         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4319 {
4320     struct wined3d_device *device = surface->resource.device;
4321     UINT pitch = wined3d_surface_get_pitch(surface);
4322     const struct wined3d_gl_info *gl_info;
4323     struct wined3d_context *context;
4324     RECT local_rect;
4325     UINT w, h;
4326
4327     surface_get_rect(surface, rect, &local_rect);
4328
4329     mem += local_rect.top * pitch + local_rect.left * bpp;
4330     w = local_rect.right - local_rect.left;
4331     h = local_rect.bottom - local_rect.top;
4332
4333     /* Activate the correct context for the render target */
4334     context = context_acquire(device, surface);
4335     context_apply_blit_state(context, device);
4336     gl_info = context->gl_info;
4337
4338     ENTER_GL();
4339
4340     if (!surface_is_offscreen(surface))
4341     {
4342         GLenum buffer = surface_get_gl_buffer(surface);
4343         TRACE("Unlocking %#x buffer.\n", buffer);
4344         context_set_draw_buffer(context, buffer);
4345
4346         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4347         glPixelZoom(1.0f, -1.0f);
4348     }
4349     else
4350     {
4351         /* Primary offscreen render target */
4352         TRACE("Offscreen render target.\n");
4353         context_set_draw_buffer(context, device->offscreenBuffer);
4354
4355         glPixelZoom(1.0f, 1.0f);
4356     }
4357
4358     glRasterPos3i(local_rect.left, local_rect.top, 1);
4359     checkGLcall("glRasterPos3i");
4360
4361     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4362     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4363
4364     if (surface->flags & SFLAG_PBO)
4365     {
4366         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4367         checkGLcall("glBindBufferARB");
4368     }
4369
4370     glDrawPixels(w, h, fmt, type, mem);
4371     checkGLcall("glDrawPixels");
4372
4373     if (surface->flags & SFLAG_PBO)
4374     {
4375         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4376         checkGLcall("glBindBufferARB");
4377     }
4378
4379     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4380     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4381
4382     LEAVE_GL();
4383
4384     if (wined3d_settings.strict_draw_ordering
4385             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4386             && surface->container.u.swapchain->front_buffer == surface))
4387         wglFlush();
4388
4389     context_release(context);
4390 }
4391
4392 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck,
4393         BOOL use_texturing, struct wined3d_format *format, CONVERT_TYPES *convert)
4394 {
4395     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4396     const struct wined3d_device *device = surface->resource.device;
4397     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4398     BOOL blit_supported = FALSE;
4399
4400     /* Copy the default values from the surface. Below we might perform fixups */
4401     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4402     *format = *surface->resource.format;
4403     *convert = NO_CONVERSION;
4404
4405     /* Ok, now look if we have to do any conversion */
4406     switch (surface->resource.format->id)
4407     {
4408         case WINED3DFMT_P8_UINT:
4409             /* Below the call to blit_supported is disabled for Wine 1.2
4410              * because the function isn't operating correctly yet. At the
4411              * moment 8-bit blits are handled in software and if certain GL
4412              * extensions are around, surface conversion is performed at
4413              * upload time. The blit_supported call recognizes it as a
4414              * destination fixup. This type of upload 'fixup' and 8-bit to
4415              * 8-bit blits need to be handled by the blit_shader.
4416              * TODO: get rid of this #if 0. */
4417 #if 0
4418             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4419                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4420                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4421 #endif
4422             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4423
4424             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4425              * texturing. Further also use conversion in case of color keying.
4426              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4427              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4428              * conflicts with this.
4429              */
4430             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4431                     || colorkey_active || !use_texturing)
4432             {
4433                 format->glFormat = GL_RGBA;
4434                 format->glInternal = GL_RGBA;
4435                 format->glType = GL_UNSIGNED_BYTE;
4436                 format->conv_byte_count = 4;
4437                 if (colorkey_active)
4438                     *convert = CONVERT_PALETTED_CK;
4439                 else
4440                     *convert = CONVERT_PALETTED;
4441             }
4442             break;
4443
4444         case WINED3DFMT_B2G3R3_UNORM:
4445             /* **********************
4446                 GL_UNSIGNED_BYTE_3_3_2
4447                 ********************** */
4448             if (colorkey_active) {
4449                 /* This texture format will never be used.. So do not care about color keying
4450                     up until the point in time it will be needed :-) */
4451                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4452             }
4453             break;
4454
4455         case WINED3DFMT_B5G6R5_UNORM:
4456             if (colorkey_active)
4457             {
4458                 *convert = CONVERT_CK_565;
4459                 format->glFormat = GL_RGBA;
4460                 format->glInternal = GL_RGB5_A1;
4461                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4462                 format->conv_byte_count = 2;
4463             }
4464             break;
4465
4466         case WINED3DFMT_B5G5R5X1_UNORM:
4467             if (colorkey_active)
4468             {
4469                 *convert = CONVERT_CK_5551;
4470                 format->glFormat = GL_BGRA;
4471                 format->glInternal = GL_RGB5_A1;
4472                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4473                 format->conv_byte_count = 2;
4474             }
4475             break;
4476
4477         case WINED3DFMT_B8G8R8_UNORM:
4478             if (colorkey_active)
4479             {
4480                 *convert = CONVERT_CK_RGB24;
4481                 format->glFormat = GL_RGBA;
4482                 format->glInternal = GL_RGBA8;
4483                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4484                 format->conv_byte_count = 4;
4485             }
4486             break;
4487
4488         case WINED3DFMT_B8G8R8X8_UNORM:
4489             if (colorkey_active)
4490             {
4491                 *convert = CONVERT_RGB32_888;
4492                 format->glFormat = GL_RGBA;
4493                 format->glInternal = GL_RGBA8;
4494                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4495                 format->conv_byte_count = 4;
4496             }
4497             break;
4498
4499         default:
4500             break;
4501     }
4502
4503     return WINED3D_OK;
4504 }
4505
4506 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4507 {
4508     const struct wined3d_device *device = surface->resource.device;
4509     const struct wined3d_palette *pal = surface->palette;
4510     BOOL index_in_alpha = FALSE;
4511     unsigned int i;
4512
4513     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4514      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4515      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4516      * duplicate entries. Store the color key in the unused alpha component to speed the
4517      * download up and to make conversion unneeded. */
4518     index_in_alpha = primary_render_target_is_p8(device);
4519
4520     if (!pal)
4521     {
4522         ERR("This code should never get entered for DirectDraw!, expect problems\n");
4523         if (index_in_alpha)
4524         {
4525             /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4526              * there's no palette at this time. */
4527             for (i = 0; i < 256; i++) table[i][3] = i;
4528         }
4529     }
4530     else
4531     {
4532         TRACE("Using surface palette %p\n", pal);
4533         /* Get the surface's palette */
4534         for (i = 0; i < 256; ++i)
4535         {
4536             table[i][0] = pal->palents[i].peRed;
4537             table[i][1] = pal->palents[i].peGreen;
4538             table[i][2] = pal->palents[i].peBlue;
4539
4540             /* When index_in_alpha is set the palette index is stored in the
4541              * alpha component. In case of a readback we can then read
4542              * GL_ALPHA. Color keying is handled in BltOverride using a
4543              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4544              * color key itself is passed to glAlphaFunc in other cases the
4545              * alpha component of pixels that should be masked away is set to 0. */
4546             if (index_in_alpha)
4547             {
4548                 table[i][3] = i;
4549             }
4550             else if (colorkey && (i >= surface->SrcBltCKey.dwColorSpaceLowValue)
4551                     && (i <= surface->SrcBltCKey.dwColorSpaceHighValue))
4552             {
4553                 table[i][3] = 0x00;
4554             }
4555             else if (pal->flags & WINEDDPCAPS_ALPHA)
4556             {
4557                 table[i][3] = pal->palents[i].peFlags;
4558             }
4559             else
4560             {
4561                 table[i][3] = 0xFF;
4562             }
4563         }
4564     }
4565 }
4566
4567 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width,
4568         UINT height, UINT outpitch, CONVERT_TYPES convert, struct wined3d_surface *surface)
4569 {
4570     const BYTE *source;
4571     BYTE *dest;
4572     TRACE("(%p)->(%p),(%d,%d,%d,%d,%p)\n", src, dst, pitch, height, outpitch, convert, surface);
4573
4574     switch (convert) {
4575         case NO_CONVERSION:
4576         {
4577             memcpy(dst, src, pitch * height);
4578             break;
4579         }
4580         case CONVERT_PALETTED:
4581         case CONVERT_PALETTED_CK:
4582         {
4583             BYTE table[256][4];
4584             unsigned int x, y;
4585
4586             d3dfmt_p8_init_palette(surface, table, (convert == CONVERT_PALETTED_CK));
4587
4588             for (y = 0; y < height; y++)
4589             {
4590                 source = src + pitch * y;
4591                 dest = dst + outpitch * y;
4592                 /* This is an 1 bpp format, using the width here is fine */
4593                 for (x = 0; x < width; x++) {
4594                     BYTE color = *source++;
4595                     *dest++ = table[color][0];
4596                     *dest++ = table[color][1];
4597                     *dest++ = table[color][2];
4598                     *dest++ = table[color][3];
4599                 }
4600             }
4601         }
4602         break;
4603
4604         case CONVERT_CK_565:
4605         {
4606             /* Converting the 565 format in 5551 packed to emulate color-keying.
4607
4608               Note : in all these conversion, it would be best to average the averaging
4609                       pixels to get the color of the pixel that will be color-keyed to
4610                       prevent 'color bleeding'. This will be done later on if ever it is
4611                       too visible.
4612
4613               Note2: Nvidia documents say that their driver does not support alpha + color keying
4614                      on the same surface and disables color keying in such a case
4615             */
4616             unsigned int x, y;
4617             const WORD *Source;
4618             WORD *Dest;
4619
4620             TRACE("Color keyed 565\n");
4621
4622             for (y = 0; y < height; y++) {
4623                 Source = (const WORD *)(src + y * pitch);
4624                 Dest = (WORD *) (dst + y * outpitch);
4625                 for (x = 0; x < width; x++ ) {
4626                     WORD color = *Source++;
4627                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4628                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4629                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4630                         *Dest |= 0x0001;
4631                     Dest++;
4632                 }
4633             }
4634         }
4635         break;
4636
4637         case CONVERT_CK_5551:
4638         {
4639             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4640             unsigned int x, y;
4641             const WORD *Source;
4642             WORD *Dest;
4643             TRACE("Color keyed 5551\n");
4644             for (y = 0; y < height; y++) {
4645                 Source = (const WORD *)(src + y * pitch);
4646                 Dest = (WORD *) (dst + y * outpitch);
4647                 for (x = 0; x < width; x++ ) {
4648                     WORD color = *Source++;
4649                     *Dest = color;
4650                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4651                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4652                         *Dest |= (1 << 15);
4653                     else
4654                         *Dest &= ~(1 << 15);
4655                     Dest++;
4656                 }
4657             }
4658         }
4659         break;
4660
4661         case CONVERT_CK_RGB24:
4662         {
4663             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4664             unsigned int x, y;
4665             for (y = 0; y < height; y++)
4666             {
4667                 source = src + pitch * y;
4668                 dest = dst + outpitch * y;
4669                 for (x = 0; x < width; x++) {
4670                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4671                     DWORD dstcolor = color << 8;
4672                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4673                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4674                         dstcolor |= 0xff;
4675                     *(DWORD*)dest = dstcolor;
4676                     source += 3;
4677                     dest += 4;
4678                 }
4679             }
4680         }
4681         break;
4682
4683         case CONVERT_RGB32_888:
4684         {
4685             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4686             unsigned int x, y;
4687             for (y = 0; y < height; y++)
4688             {
4689                 source = src + pitch * y;
4690                 dest = dst + outpitch * y;
4691                 for (x = 0; x < width; x++) {
4692                     DWORD color = 0xffffff & *(const DWORD*)source;
4693                     DWORD dstcolor = color << 8;
4694                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4695                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4696                         dstcolor |= 0xff;
4697                     *(DWORD*)dest = dstcolor;
4698                     source += 4;
4699                     dest += 4;
4700                 }
4701             }
4702         }
4703         break;
4704
4705         default:
4706             ERR("Unsupported conversion type %#x.\n", convert);
4707     }
4708     return WINED3D_OK;
4709 }
4710
4711 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4712 {
4713     /* Flip the surface contents */
4714     /* Flip the DC */
4715     {
4716         HDC tmp;
4717         tmp = front->hDC;
4718         front->hDC = back->hDC;
4719         back->hDC = tmp;
4720     }
4721
4722     /* Flip the DIBsection */
4723     {
4724         HBITMAP tmp;
4725         BOOL hasDib = front->flags & SFLAG_DIBSECTION;
4726         tmp = front->dib.DIBsection;
4727         front->dib.DIBsection = back->dib.DIBsection;
4728         back->dib.DIBsection = tmp;
4729
4730         if (back->flags & SFLAG_DIBSECTION) front->flags |= SFLAG_DIBSECTION;
4731         else front->flags &= ~SFLAG_DIBSECTION;
4732         if (hasDib) back->flags |= SFLAG_DIBSECTION;
4733         else back->flags &= ~SFLAG_DIBSECTION;
4734     }
4735
4736     /* Flip the surface data */
4737     {
4738         void* tmp;
4739
4740         tmp = front->dib.bitmap_data;
4741         front->dib.bitmap_data = back->dib.bitmap_data;
4742         back->dib.bitmap_data = tmp;
4743
4744         tmp = front->resource.allocatedMemory;
4745         front->resource.allocatedMemory = back->resource.allocatedMemory;
4746         back->resource.allocatedMemory = tmp;
4747
4748         tmp = front->resource.heapMemory;
4749         front->resource.heapMemory = back->resource.heapMemory;
4750         back->resource.heapMemory = tmp;
4751     }
4752
4753     /* Flip the PBO */
4754     {
4755         GLuint tmp_pbo = front->pbo;
4756         front->pbo = back->pbo;
4757         back->pbo = tmp_pbo;
4758     }
4759
4760     /* client_memory should not be different, but just in case */
4761     {
4762         BOOL tmp;
4763         tmp = front->dib.client_memory;
4764         front->dib.client_memory = back->dib.client_memory;
4765         back->dib.client_memory = tmp;
4766     }
4767
4768     /* Flip the opengl texture */
4769     {
4770         GLuint tmp;
4771
4772         tmp = back->texture_name;
4773         back->texture_name = front->texture_name;
4774         front->texture_name = tmp;
4775
4776         tmp = back->texture_name_srgb;
4777         back->texture_name_srgb = front->texture_name_srgb;
4778         front->texture_name_srgb = tmp;
4779
4780         tmp = back->rb_multisample;
4781         back->rb_multisample = front->rb_multisample;
4782         front->rb_multisample = tmp;
4783
4784         tmp = back->rb_resolved;
4785         back->rb_resolved = front->rb_resolved;
4786         front->rb_resolved = tmp;
4787
4788         resource_unload(&back->resource);
4789         resource_unload(&front->resource);
4790     }
4791
4792     {
4793         DWORD tmp_flags = back->flags;
4794         back->flags = front->flags;
4795         front->flags = tmp_flags;
4796     }
4797 }
4798
4799 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4800  * pixel copy calls. */
4801 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4802         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4803 {
4804     struct wined3d_device *device = dst_surface->resource.device;
4805     float xrel, yrel;
4806     UINT row;
4807     struct wined3d_context *context;
4808     BOOL upsidedown = FALSE;
4809     RECT dst_rect = *dst_rect_in;
4810
4811     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4812      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4813      */
4814     if(dst_rect.top > dst_rect.bottom) {
4815         UINT tmp = dst_rect.bottom;
4816         dst_rect.bottom = dst_rect.top;
4817         dst_rect.top = tmp;
4818         upsidedown = TRUE;
4819     }
4820
4821     context = context_acquire(device, src_surface);
4822     context_apply_blit_state(context, device);
4823     surface_internal_preload(dst_surface, SRGB_RGB);
4824     ENTER_GL();
4825
4826     /* Bind the target texture */
4827     context_bind_texture(context, dst_surface->texture_target, dst_surface->texture_name);
4828     if (surface_is_offscreen(src_surface))
4829     {
4830         TRACE("Reading from an offscreen target\n");
4831         upsidedown = !upsidedown;
4832         glReadBuffer(device->offscreenBuffer);
4833     }
4834     else
4835     {
4836         glReadBuffer(surface_get_gl_buffer(src_surface));
4837     }
4838     checkGLcall("glReadBuffer");
4839
4840     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4841     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4842
4843     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4844     {
4845         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4846
4847         if(Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT) {
4848             ERR("Texture filtering not supported in direct blit\n");
4849         }
4850     }
4851     else if ((Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT)
4852             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4853     {
4854         ERR("Texture filtering not supported in direct blit\n");
4855     }
4856
4857     if (upsidedown
4858             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4859             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4860     {
4861         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4862
4863         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4864                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4865                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4866                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4867     }
4868     else
4869     {
4870         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4871         /* I have to process this row by row to swap the image,
4872          * otherwise it would be upside down, so stretching in y direction
4873          * doesn't cost extra time
4874          *
4875          * However, stretching in x direction can be avoided if not necessary
4876          */
4877         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4878             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4879             {
4880                 /* Well, that stuff works, but it's very slow.
4881                  * find a better way instead
4882                  */
4883                 UINT col;
4884
4885                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4886                 {
4887                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4888                             dst_rect.left + col /* x offset */, row /* y offset */,
4889                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4890                 }
4891             }
4892             else
4893             {
4894                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4895                         dst_rect.left /* x offset */, row /* y offset */,
4896                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4897             }
4898         }
4899     }
4900     checkGLcall("glCopyTexSubImage2D");
4901
4902     LEAVE_GL();
4903     context_release(context);
4904
4905     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4906      * path is never entered
4907      */
4908     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4909 }
4910
4911 /* Uses the hardware to stretch and flip the image */
4912 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4913         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4914 {
4915     struct wined3d_device *device = dst_surface->resource.device;
4916     struct wined3d_swapchain *src_swapchain = NULL;
4917     GLuint src, backup = 0;
4918     float left, right, top, bottom; /* Texture coordinates */
4919     UINT fbwidth = src_surface->resource.width;
4920     UINT fbheight = src_surface->resource.height;
4921     struct wined3d_context *context;
4922     GLenum drawBuffer = GL_BACK;
4923     GLenum texture_target;
4924     BOOL noBackBufferBackup;
4925     BOOL src_offscreen;
4926     BOOL upsidedown = FALSE;
4927     RECT dst_rect = *dst_rect_in;
4928
4929     TRACE("Using hwstretch blit\n");
4930     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4931     context = context_acquire(device, src_surface);
4932     context_apply_blit_state(context, device);
4933     surface_internal_preload(dst_surface, SRGB_RGB);
4934
4935     src_offscreen = surface_is_offscreen(src_surface);
4936     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4937     if (!noBackBufferBackup && !src_surface->texture_name)
4938     {
4939         /* Get it a description */
4940         surface_internal_preload(src_surface, SRGB_RGB);
4941     }
4942     ENTER_GL();
4943
4944     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4945      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4946      */
4947     if (context->aux_buffers >= 2)
4948     {
4949         /* Got more than one aux buffer? Use the 2nd aux buffer */
4950         drawBuffer = GL_AUX1;
4951     }
4952     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4953     {
4954         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
4955         drawBuffer = GL_AUX0;
4956     }
4957
4958     if(noBackBufferBackup) {
4959         glGenTextures(1, &backup);
4960         checkGLcall("glGenTextures");
4961         context_bind_texture(context, GL_TEXTURE_2D, backup);
4962         texture_target = GL_TEXTURE_2D;
4963     } else {
4964         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
4965          * we are reading from the back buffer, the backup can be used as source texture
4966          */
4967         texture_target = src_surface->texture_target;
4968         context_bind_texture(context, texture_target, src_surface->texture_name);
4969         glEnable(texture_target);
4970         checkGLcall("glEnable(texture_target)");
4971
4972         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
4973         src_surface->flags &= ~SFLAG_INTEXTURE;
4974     }
4975
4976     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4977      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4978      */
4979     if(dst_rect.top > dst_rect.bottom) {
4980         UINT tmp = dst_rect.bottom;
4981         dst_rect.bottom = dst_rect.top;
4982         dst_rect.top = tmp;
4983         upsidedown = TRUE;
4984     }
4985
4986     if (src_offscreen)
4987     {
4988         TRACE("Reading from an offscreen target\n");
4989         upsidedown = !upsidedown;
4990         glReadBuffer(device->offscreenBuffer);
4991     }
4992     else
4993     {
4994         glReadBuffer(surface_get_gl_buffer(src_surface));
4995     }
4996
4997     /* TODO: Only back up the part that will be overwritten */
4998     glCopyTexSubImage2D(texture_target, 0,
4999                         0, 0 /* read offsets */,
5000                         0, 0,
5001                         fbwidth,
5002                         fbheight);
5003
5004     checkGLcall("glCopyTexSubImage2D");
5005
5006     /* No issue with overriding these - the sampler is dirty due to blit usage */
5007     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
5008             wined3d_gl_mag_filter(magLookup, Filter));
5009     checkGLcall("glTexParameteri");
5010     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
5011             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
5012     checkGLcall("glTexParameteri");
5013
5014     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5015         src_swapchain = src_surface->container.u.swapchain;
5016     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
5017     {
5018         src = backup ? backup : src_surface->texture_name;
5019     }
5020     else
5021     {
5022         glReadBuffer(GL_FRONT);
5023         checkGLcall("glReadBuffer(GL_FRONT)");
5024
5025         glGenTextures(1, &src);
5026         checkGLcall("glGenTextures(1, &src)");
5027         context_bind_texture(context, GL_TEXTURE_2D, src);
5028
5029         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5030          * out for power of 2 sizes
5031          */
5032         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5033                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5034         checkGLcall("glTexImage2D");
5035         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
5036                             0, 0 /* read offsets */,
5037                             0, 0,
5038                             fbwidth,
5039                             fbheight);
5040
5041         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5042         checkGLcall("glTexParameteri");
5043         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5044         checkGLcall("glTexParameteri");
5045
5046         glReadBuffer(GL_BACK);
5047         checkGLcall("glReadBuffer(GL_BACK)");
5048
5049         if(texture_target != GL_TEXTURE_2D) {
5050             glDisable(texture_target);
5051             glEnable(GL_TEXTURE_2D);
5052             texture_target = GL_TEXTURE_2D;
5053         }
5054     }
5055     checkGLcall("glEnd and previous");
5056
5057     left = src_rect->left;
5058     right = src_rect->right;
5059
5060     if (!upsidedown)
5061     {
5062         top = src_surface->resource.height - src_rect->top;
5063         bottom = src_surface->resource.height - src_rect->bottom;
5064     }
5065     else
5066     {
5067         top = src_surface->resource.height - src_rect->bottom;
5068         bottom = src_surface->resource.height - src_rect->top;
5069     }
5070
5071     if (src_surface->flags & SFLAG_NORMCOORD)
5072     {
5073         left /= src_surface->pow2Width;
5074         right /= src_surface->pow2Width;
5075         top /= src_surface->pow2Height;
5076         bottom /= src_surface->pow2Height;
5077     }
5078
5079     /* draw the source texture stretched and upside down. The correct surface is bound already */
5080     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5081     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5082
5083     context_set_draw_buffer(context, drawBuffer);
5084     glReadBuffer(drawBuffer);
5085
5086     glBegin(GL_QUADS);
5087         /* bottom left */
5088         glTexCoord2f(left, bottom);
5089         glVertex2i(0, 0);
5090
5091         /* top left */
5092         glTexCoord2f(left, top);
5093         glVertex2i(0, dst_rect.bottom - dst_rect.top);
5094
5095         /* top right */
5096         glTexCoord2f(right, top);
5097         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5098
5099         /* bottom right */
5100         glTexCoord2f(right, bottom);
5101         glVertex2i(dst_rect.right - dst_rect.left, 0);
5102     glEnd();
5103     checkGLcall("glEnd and previous");
5104
5105     if (texture_target != dst_surface->texture_target)
5106     {
5107         glDisable(texture_target);
5108         glEnable(dst_surface->texture_target);
5109         texture_target = dst_surface->texture_target;
5110     }
5111
5112     /* Now read the stretched and upside down image into the destination texture */
5113     context_bind_texture(context, texture_target, dst_surface->texture_name);
5114     glCopyTexSubImage2D(texture_target,
5115                         0,
5116                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5117                         0, 0, /* We blitted the image to the origin */
5118                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5119     checkGLcall("glCopyTexSubImage2D");
5120
5121     if(drawBuffer == GL_BACK) {
5122         /* Write the back buffer backup back */
5123         if(backup) {
5124             if(texture_target != GL_TEXTURE_2D) {
5125                 glDisable(texture_target);
5126                 glEnable(GL_TEXTURE_2D);
5127                 texture_target = GL_TEXTURE_2D;
5128             }
5129             context_bind_texture(context, GL_TEXTURE_2D, backup);
5130         }
5131         else
5132         {
5133             if (texture_target != src_surface->texture_target)
5134             {
5135                 glDisable(texture_target);
5136                 glEnable(src_surface->texture_target);
5137                 texture_target = src_surface->texture_target;
5138             }
5139             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5140         }
5141
5142         glBegin(GL_QUADS);
5143             /* top left */
5144             glTexCoord2f(0.0f, 0.0f);
5145             glVertex2i(0, fbheight);
5146
5147             /* bottom left */
5148             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5149             glVertex2i(0, 0);
5150
5151             /* bottom right */
5152             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5153                     (float)fbheight / (float)src_surface->pow2Height);
5154             glVertex2i(fbwidth, 0);
5155
5156             /* top right */
5157             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5158             glVertex2i(fbwidth, fbheight);
5159         glEnd();
5160     }
5161     glDisable(texture_target);
5162     checkGLcall("glDisable(texture_target)");
5163
5164     /* Cleanup */
5165     if (src != src_surface->texture_name && src != backup)
5166     {
5167         glDeleteTextures(1, &src);
5168         checkGLcall("glDeleteTextures(1, &src)");
5169     }
5170     if(backup) {
5171         glDeleteTextures(1, &backup);
5172         checkGLcall("glDeleteTextures(1, &backup)");
5173     }
5174
5175     LEAVE_GL();
5176
5177     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5178
5179     context_release(context);
5180
5181     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5182      * path is never entered
5183      */
5184     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5185 }
5186
5187 /* Front buffer coordinates are always full screen coordinates, but our GL
5188  * drawable is limited to the window's client area. The sysmem and texture
5189  * copies do have the full screen size. Note that GL has a bottom-left
5190  * origin, while D3D has a top-left origin. */
5191 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5192 {
5193     UINT drawable_height;
5194
5195     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5196             && surface == surface->container.u.swapchain->front_buffer)
5197     {
5198         POINT offset = {0, 0};
5199         RECT windowsize;
5200
5201         ScreenToClient(window, &offset);
5202         OffsetRect(rect, offset.x, offset.y);
5203
5204         GetClientRect(window, &windowsize);
5205         drawable_height = windowsize.bottom - windowsize.top;
5206     }
5207     else
5208     {
5209         drawable_height = surface->resource.height;
5210     }
5211
5212     rect->top = drawable_height - rect->top;
5213     rect->bottom = drawable_height - rect->bottom;
5214 }
5215
5216 static void surface_blt_to_drawable(struct wined3d_device *device,
5217         WINED3DTEXTUREFILTERTYPE filter, BOOL color_key,
5218         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5219         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5220 {
5221     struct wined3d_context *context;
5222     RECT src_rect, dst_rect;
5223
5224     src_rect = *src_rect_in;
5225     dst_rect = *dst_rect_in;
5226
5227     /* Make sure the surface is up-to-date. This should probably use
5228      * surface_load_location() and worry about the destination surface too,
5229      * unless we're overwriting it completely. */
5230     surface_internal_preload(src_surface, SRGB_RGB);
5231
5232     /* Activate the destination context, set it up for blitting */
5233     context = context_acquire(device, dst_surface);
5234     context_apply_blit_state(context, device);
5235
5236     if (!surface_is_offscreen(dst_surface))
5237         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5238
5239     device->blitter->set_shader(device->blit_priv, context, src_surface);
5240
5241     ENTER_GL();
5242
5243     if (color_key)
5244     {
5245         glEnable(GL_ALPHA_TEST);
5246         checkGLcall("glEnable(GL_ALPHA_TEST)");
5247
5248         /* When the primary render target uses P8, the alpha component
5249          * contains the palette index. Which means that the colorkey is one of
5250          * the palette entries. In other cases pixels that should be masked
5251          * away have alpha set to 0. */
5252         if (primary_render_target_is_p8(device))
5253             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->SrcBltCKey.dwColorSpaceLowValue / 256.0f);
5254         else
5255             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5256         checkGLcall("glAlphaFunc");
5257     }
5258     else
5259     {
5260         glDisable(GL_ALPHA_TEST);
5261         checkGLcall("glDisable(GL_ALPHA_TEST)");
5262     }
5263
5264     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5265
5266     if (color_key)
5267     {
5268         glDisable(GL_ALPHA_TEST);
5269         checkGLcall("glDisable(GL_ALPHA_TEST)");
5270     }
5271
5272     LEAVE_GL();
5273
5274     /* Leave the opengl state valid for blitting */
5275     device->blitter->unset_shader(context->gl_info);
5276
5277     if (wined3d_settings.strict_draw_ordering
5278             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5279             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5280         wglFlush(); /* Flush to ensure ordering across contexts. */
5281
5282     context_release(context);
5283 }
5284
5285 /* Do not call while under the GL lock. */
5286 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const WINED3DCOLORVALUE *color)
5287 {
5288     struct wined3d_device *device = s->resource.device;
5289     const struct blit_shader *blitter;
5290
5291     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5292             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5293     if (!blitter)
5294     {
5295         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5296         return WINED3DERR_INVALIDCALL;
5297     }
5298
5299     return blitter->color_fill(device, s, rect, color);
5300 }
5301
5302 /* Do not call while under the GL lock. */
5303 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5304         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5305         WINED3DTEXTUREFILTERTYPE Filter)
5306 {
5307     struct wined3d_device *device = dst_surface->resource.device;
5308     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5309     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5310
5311     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5312             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5313             flags, DDBltFx, debug_d3dtexturefiltertype(Filter));
5314
5315     /* Get the swapchain. One of the surfaces has to be a primary surface */
5316     if (dst_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5317     {
5318         WARN("Destination is in sysmem, rejecting gl blt\n");
5319         return WINED3DERR_INVALIDCALL;
5320     }
5321
5322     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5323         dstSwapchain = dst_surface->container.u.swapchain;
5324
5325     if (src_surface)
5326     {
5327         if (src_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5328         {
5329             WARN("Src is in sysmem, rejecting gl blt\n");
5330             return WINED3DERR_INVALIDCALL;
5331         }
5332
5333         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5334             srcSwapchain = src_surface->container.u.swapchain;
5335     }
5336
5337     /* Early sort out of cases where no render target is used */
5338     if (!dstSwapchain && !srcSwapchain
5339             && src_surface != device->fb.render_targets[0]
5340             && dst_surface != device->fb.render_targets[0])
5341     {
5342         TRACE("No surface is render target, not using hardware blit.\n");
5343         return WINED3DERR_INVALIDCALL;
5344     }
5345
5346     /* No destination color keying supported */
5347     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5348     {
5349         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5350         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5351         return WINED3DERR_INVALIDCALL;
5352     }
5353
5354     if (dstSwapchain && dstSwapchain == srcSwapchain)
5355     {
5356         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5357         return WINED3DERR_INVALIDCALL;
5358     }
5359
5360     if (dstSwapchain && srcSwapchain)
5361     {
5362         FIXME("Implement hardware blit between two different swapchains\n");
5363         return WINED3DERR_INVALIDCALL;
5364     }
5365
5366     if (dstSwapchain)
5367     {
5368         /* Handled with regular texture -> swapchain blit */
5369         if (src_surface == device->fb.render_targets[0])
5370             TRACE("Blit from active render target to a swapchain\n");
5371     }
5372     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5373     {
5374         FIXME("Implement blit from a swapchain to the active render target\n");
5375         return WINED3DERR_INVALIDCALL;
5376     }
5377
5378     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5379     {
5380         /* Blit from render target to texture */
5381         BOOL stretchx;
5382
5383         /* P8 read back is not implemented */
5384         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5385                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5386         {
5387             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5388             return WINED3DERR_INVALIDCALL;
5389         }
5390
5391         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5392         {
5393             TRACE("Color keying not supported by frame buffer to texture blit\n");
5394             return WINED3DERR_INVALIDCALL;
5395             /* Destination color key is checked above */
5396         }
5397
5398         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5399             stretchx = TRUE;
5400         else
5401             stretchx = FALSE;
5402
5403         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5404          * flip the image nor scale it.
5405          *
5406          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5407          * -> If the app wants a image width an unscaled width, copy it line per line
5408          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5409          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5410          *    back buffer. This is slower than reading line per line, thus not used for flipping
5411          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5412          *    pixel by pixel. */
5413         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5414                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5415         {
5416             TRACE("No stretching in x direction, using direct framebuffer -> texture copy\n");
5417             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, Filter);
5418         } else {
5419             TRACE("Using hardware stretching to flip / stretch the texture\n");
5420             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, Filter);
5421         }
5422
5423         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5424         {
5425             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5426             dst_surface->resource.allocatedMemory = NULL;
5427             dst_surface->resource.heapMemory = NULL;
5428         }
5429         else
5430         {
5431             dst_surface->flags &= ~SFLAG_INSYSMEM;
5432         }
5433
5434         return WINED3D_OK;
5435     }
5436     else if (src_surface)
5437     {
5438         /* Blit from offscreen surface to render target */
5439         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5440         WINEDDCOLORKEY oldBltCKey = src_surface->SrcBltCKey;
5441
5442         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5443
5444         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5445                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5446                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5447         {
5448             FIXME("Unsupported blit operation falling back to software\n");
5449             return WINED3DERR_INVALIDCALL;
5450         }
5451
5452         /* Color keying: Check if we have to do a color keyed blt,
5453          * and if not check if a color key is activated.
5454          *
5455          * Just modify the color keying parameters in the surface and restore them afterwards
5456          * The surface keeps track of the color key last used to load the opengl surface.
5457          * PreLoad will catch the change to the flags and color key and reload if necessary.
5458          */
5459         if (flags & WINEDDBLT_KEYSRC)
5460         {
5461             /* Use color key from surface */
5462         }
5463         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5464         {
5465             /* Use color key from DDBltFx */
5466             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5467             src_surface->SrcBltCKey = DDBltFx->ddckSrcColorkey;
5468         }
5469         else
5470         {
5471             /* Do not use color key */
5472             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5473         }
5474
5475         surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5476                 src_surface, src_rect, dst_surface, dst_rect);
5477
5478         /* Restore the color key parameters */
5479         src_surface->CKeyFlags = oldCKeyFlags;
5480         src_surface->SrcBltCKey = oldBltCKey;
5481
5482         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5483
5484         return WINED3D_OK;
5485     }
5486
5487     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5488     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5489     return WINED3DERR_INVALIDCALL;
5490 }
5491
5492 /* GL locking is done by the caller */
5493 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5494         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5495 {
5496     struct wined3d_device *device = surface->resource.device;
5497     const struct wined3d_gl_info *gl_info = context->gl_info;
5498     GLint compare_mode = GL_NONE;
5499     struct blt_info info;
5500     GLint old_binding = 0;
5501     RECT rect;
5502
5503     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5504
5505     glDisable(GL_CULL_FACE);
5506     glDisable(GL_BLEND);
5507     glDisable(GL_ALPHA_TEST);
5508     glDisable(GL_SCISSOR_TEST);
5509     glDisable(GL_STENCIL_TEST);
5510     glEnable(GL_DEPTH_TEST);
5511     glDepthFunc(GL_ALWAYS);
5512     glDepthMask(GL_TRUE);
5513     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5514     glViewport(x, y, w, h);
5515
5516     SetRect(&rect, 0, h, w, 0);
5517     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5518     context_active_texture(context, context->gl_info, 0);
5519     glGetIntegerv(info.binding, &old_binding);
5520     glBindTexture(info.bind_target, texture);
5521     if (gl_info->supported[ARB_SHADOW])
5522     {
5523         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5524         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5525     }
5526
5527     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5528             gl_info, info.tex_type, &surface->ds_current_size);
5529
5530     glBegin(GL_TRIANGLE_STRIP);
5531     glTexCoord3fv(info.coords[0]);
5532     glVertex2f(-1.0f, -1.0f);
5533     glTexCoord3fv(info.coords[1]);
5534     glVertex2f(1.0f, -1.0f);
5535     glTexCoord3fv(info.coords[2]);
5536     glVertex2f(-1.0f, 1.0f);
5537     glTexCoord3fv(info.coords[3]);
5538     glVertex2f(1.0f, 1.0f);
5539     glEnd();
5540
5541     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5542     glBindTexture(info.bind_target, old_binding);
5543
5544     glPopAttrib();
5545
5546     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5547 }
5548
5549 void surface_modify_ds_location(struct wined3d_surface *surface,
5550         DWORD location, UINT w, UINT h)
5551 {
5552     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5553
5554     if (location & ~SFLAG_DS_LOCATIONS)
5555         FIXME("Invalid location (%#x) specified.\n", location);
5556
5557     surface->ds_current_size.cx = w;
5558     surface->ds_current_size.cy = h;
5559     surface->flags &= ~SFLAG_DS_LOCATIONS;
5560     surface->flags |= location;
5561 }
5562
5563 /* Context activation is done by the caller. */
5564 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5565 {
5566     struct wined3d_device *device = surface->resource.device;
5567     GLsizei w, h;
5568
5569     TRACE("surface %p, new location %#x.\n", surface, location);
5570
5571     /* TODO: Make this work for modes other than FBO */
5572     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5573
5574     if (!(surface->flags & location))
5575     {
5576         w = surface->ds_current_size.cx;
5577         h = surface->ds_current_size.cy;
5578         surface->ds_current_size.cx = 0;
5579         surface->ds_current_size.cy = 0;
5580     }
5581     else
5582     {
5583         w = surface->resource.width;
5584         h = surface->resource.height;
5585     }
5586
5587     if (surface->ds_current_size.cx == surface->resource.width
5588             && surface->ds_current_size.cy == surface->resource.height)
5589     {
5590         TRACE("Location (%#x) is already up to date.\n", location);
5591         return;
5592     }
5593
5594     if (surface->current_renderbuffer)
5595     {
5596         FIXME("Not supported with fixed up depth stencil.\n");
5597         return;
5598     }
5599
5600     if (!(surface->flags & SFLAG_DS_LOCATIONS))
5601     {
5602         /* This mostly happens when a depth / stencil is used without being
5603          * cleared first. In principle we could upload from sysmem, or
5604          * explicitly clear before first usage. For the moment there don't
5605          * appear to be a lot of applications depending on this, so a FIXME
5606          * should do. */
5607         FIXME("No up to date depth stencil location.\n");
5608         surface->flags |= location;
5609         surface->ds_current_size.cx = surface->resource.width;
5610         surface->ds_current_size.cy = surface->resource.height;
5611         return;
5612     }
5613
5614     if (location == SFLAG_DS_OFFSCREEN)
5615     {
5616         GLint old_binding = 0;
5617         GLenum bind_target;
5618
5619         /* The render target is allowed to be smaller than the depth/stencil
5620          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5621          * than the offscreen surface. Don't overwrite the offscreen surface
5622          * with undefined data. */
5623         w = min(w, context->swapchain->presentParms.BackBufferWidth);
5624         h = min(h, context->swapchain->presentParms.BackBufferHeight);
5625
5626         TRACE("Copying onscreen depth buffer to depth texture.\n");
5627
5628         ENTER_GL();
5629
5630         if (!device->depth_blt_texture)
5631         {
5632             glGenTextures(1, &device->depth_blt_texture);
5633         }
5634
5635         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5636          * directly on the FBO texture. That's because we need to flip. */
5637         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5638                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5639         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5640         {
5641             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5642             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5643         }
5644         else
5645         {
5646             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5647             bind_target = GL_TEXTURE_2D;
5648         }
5649         glBindTexture(bind_target, device->depth_blt_texture);
5650         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5651          * internal format, because the internal format might include stencil
5652          * data. In principle we should copy stencil data as well, but unless
5653          * the driver supports stencil export it's hard to do, and doesn't
5654          * seem to be needed in practice. If the hardware doesn't support
5655          * writing stencil data, the glCopyTexImage2D() call might trigger
5656          * software fallbacks. */
5657         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5658         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5659         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5660         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5661         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5662         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5663         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5664         glBindTexture(bind_target, old_binding);
5665
5666         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5667                 NULL, surface, SFLAG_INTEXTURE);
5668         context_set_draw_buffer(context, GL_NONE);
5669         glReadBuffer(GL_NONE);
5670
5671         /* Do the actual blit */
5672         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5673         checkGLcall("depth_blt");
5674
5675         context_invalidate_state(context, STATE_FRAMEBUFFER);
5676
5677         LEAVE_GL();
5678
5679         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5680     }
5681     else if (location == SFLAG_DS_ONSCREEN)
5682     {
5683         TRACE("Copying depth texture to onscreen depth buffer.\n");
5684
5685         ENTER_GL();
5686
5687         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5688                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5689         surface_depth_blt(surface, context, surface->texture_name,
5690                 0, surface->pow2Height - h, w, h, surface->texture_target);
5691         checkGLcall("depth_blt");
5692
5693         context_invalidate_state(context, STATE_FRAMEBUFFER);
5694
5695         LEAVE_GL();
5696
5697         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5698     }
5699     else
5700     {
5701         ERR("Invalid location (%#x) specified.\n", location);
5702     }
5703
5704     surface->flags |= location;
5705     surface->ds_current_size.cx = surface->resource.width;
5706     surface->ds_current_size.cy = surface->resource.height;
5707 }
5708
5709 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5710 {
5711     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5712     struct wined3d_surface *overlay;
5713
5714     TRACE("surface %p, location %s, persistent %#x.\n",
5715             surface, debug_surflocation(location), persistent);
5716
5717     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5718             && (location & SFLAG_INDRAWABLE))
5719         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5720
5721     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5722             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5723         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5724
5725     if (persistent)
5726     {
5727         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5728                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5729         {
5730             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5731             {
5732                 TRACE("Passing to container.\n");
5733                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5734             }
5735         }
5736         surface->flags &= ~SFLAG_LOCATIONS;
5737         surface->flags |= location;
5738
5739         /* Redraw emulated overlays, if any */
5740         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5741         {
5742             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5743             {
5744                 overlay->surface_ops->surface_draw_overlay(overlay);
5745             }
5746         }
5747     }
5748     else
5749     {
5750         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5751         {
5752             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5753             {
5754                 TRACE("Passing to container\n");
5755                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5756             }
5757         }
5758         surface->flags &= ~location;
5759     }
5760
5761     if (!(surface->flags & SFLAG_LOCATIONS))
5762     {
5763         ERR("Surface %p does not have any up to date location.\n", surface);
5764     }
5765 }
5766
5767 static DWORD resource_access_from_location(DWORD location)
5768 {
5769     switch (location)
5770     {
5771         case SFLAG_INSYSMEM:
5772             return WINED3D_RESOURCE_ACCESS_CPU;
5773
5774         case SFLAG_INDRAWABLE:
5775         case SFLAG_INSRGBTEX:
5776         case SFLAG_INTEXTURE:
5777         case SFLAG_INRB_MULTISAMPLE:
5778         case SFLAG_INRB_RESOLVED:
5779             return WINED3D_RESOURCE_ACCESS_GPU;
5780
5781         default:
5782             FIXME("Unhandled location %#x.\n", location);
5783             return 0;
5784     }
5785 }
5786
5787 static void surface_load_sysmem(struct wined3d_surface *surface,
5788         const struct wined3d_gl_info *gl_info, const RECT *rect)
5789 {
5790     surface_prepare_system_memory(surface);
5791
5792     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED))
5793         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5794
5795     /* Download the surface to system memory. */
5796     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5797     {
5798         struct wined3d_device *device = surface->resource.device;
5799         struct wined3d_context *context;
5800
5801         /* TODO: Use already acquired context when possible. */
5802         context = context_acquire(device, NULL);
5803
5804         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5805         surface_download_data(surface, gl_info);
5806
5807         context_release(context);
5808
5809         return;
5810     }
5811
5812     /* Note: It might be faster to download into a texture first. */
5813     read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5814             wined3d_surface_get_pitch(surface));
5815 }
5816
5817 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5818         const struct wined3d_gl_info *gl_info, const RECT *rect)
5819 {
5820     struct wined3d_device *device = surface->resource.device;
5821     struct wined3d_format format;
5822     CONVERT_TYPES convert;
5823     UINT byte_count;
5824     BYTE *mem;
5825
5826     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5827     {
5828         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5829         return WINED3DERR_INVALIDCALL;
5830     }
5831
5832     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5833         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5834
5835     if (surface->flags & SFLAG_INTEXTURE)
5836     {
5837         RECT r;
5838
5839         surface_get_rect(surface, rect, &r);
5840         surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
5841
5842         return WINED3D_OK;
5843     }
5844
5845     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5846     {
5847         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5848          * path through sysmem. */
5849         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5850     }
5851
5852     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5853
5854     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5855      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5856      * called. */
5857     if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
5858     {
5859         struct wined3d_context *context;
5860
5861         TRACE("Removing the pbo attached to surface %p.\n", surface);
5862
5863         /* TODO: Use already acquired context when possible. */
5864         context = context_acquire(device, NULL);
5865
5866         surface_remove_pbo(surface, gl_info);
5867
5868         context_release(context);
5869     }
5870
5871     if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
5872     {
5873         UINT height = surface->resource.height;
5874         UINT width = surface->resource.width;
5875         UINT src_pitch, dst_pitch;
5876
5877         byte_count = format.conv_byte_count;
5878         src_pitch = wined3d_surface_get_pitch(surface);
5879
5880         /* Stick to the alignment for the converted surface too, makes it
5881          * easier to load the surface. */
5882         dst_pitch = width * byte_count;
5883         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5884
5885         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5886         {
5887             ERR("Out of memory (%u).\n", dst_pitch * height);
5888             return E_OUTOFMEMORY;
5889         }
5890
5891         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5892                 src_pitch, width, height, dst_pitch, convert, surface);
5893
5894         surface->flags |= SFLAG_CONVERTED;
5895     }
5896     else
5897     {
5898         surface->flags &= ~SFLAG_CONVERTED;
5899         mem = surface->resource.allocatedMemory;
5900         byte_count = format.byte_count;
5901     }
5902
5903     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
5904
5905     /* Don't delete PBO memory. */
5906     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
5907         HeapFree(GetProcessHeap(), 0, mem);
5908
5909     return WINED3D_OK;
5910 }
5911
5912 static HRESULT surface_load_texture(struct wined3d_surface *surface,
5913         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
5914 {
5915     const DWORD attach_flags = WINED3DFMT_FLAG_FBO_ATTACHABLE | WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB;
5916     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
5917     struct wined3d_device *device = surface->resource.device;
5918     struct wined3d_context *context;
5919     UINT width, src_pitch, dst_pitch;
5920     struct wined3d_bo_address data;
5921     struct wined3d_format format;
5922     POINT dst_point = {0, 0};
5923     CONVERT_TYPES convert;
5924     BYTE *mem;
5925
5926     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
5927             && surface_is_offscreen(surface)
5928             && (surface->flags & SFLAG_INDRAWABLE))
5929     {
5930         read_from_framebuffer_texture(surface, srgb);
5931
5932         return WINED3D_OK;
5933     }
5934
5935     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
5936             && (surface->resource.format->flags & attach_flags) == attach_flags
5937             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5938                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
5939                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
5940     {
5941         if (srgb)
5942             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INTEXTURE,
5943                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
5944         else
5945             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INSRGBTEX,
5946                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
5947
5948         return WINED3D_OK;
5949     }
5950
5951     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED)
5952             && (surface->resource.format->flags & attach_flags) == attach_flags
5953             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5954                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
5955                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
5956     {
5957         DWORD src_location = surface->flags & SFLAG_INRB_RESOLVED ? SFLAG_INRB_RESOLVED : SFLAG_INRB_MULTISAMPLE;
5958         DWORD dst_location = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
5959         RECT rect = {0, 0, surface->resource.width, surface->resource.height};
5960
5961         surface_blt_fbo(device, WINED3DTEXF_POINT, surface, src_location,
5962                 &rect, surface, dst_location, &rect);
5963
5964         return WINED3D_OK;
5965     }
5966
5967     /* Upload from system memory */
5968
5969     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
5970             TRUE /* We will use textures */, &format, &convert);
5971
5972     if (srgb)
5973     {
5974         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
5975         {
5976             /* Performance warning... */
5977             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
5978             surface_load_location(surface, SFLAG_INSYSMEM, rect);
5979         }
5980     }
5981     else
5982     {
5983         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
5984         {
5985             /* Performance warning... */
5986             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
5987             surface_load_location(surface, SFLAG_INSYSMEM, rect);
5988         }
5989     }
5990
5991     if (!(surface->flags & SFLAG_INSYSMEM))
5992     {
5993         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
5994         /* Lets hope we get it from somewhere... */
5995         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5996     }
5997
5998     /* TODO: Use already acquired context when possible. */
5999     context = context_acquire(device, NULL);
6000
6001     surface_prepare_texture(surface, context, srgb);
6002     surface_bind_and_dirtify(surface, context, srgb);
6003
6004     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6005     {
6006         surface->flags |= SFLAG_GLCKEY;
6007         surface->glCKey = surface->SrcBltCKey;
6008     }
6009     else surface->flags &= ~SFLAG_GLCKEY;
6010
6011     width = surface->resource.width;
6012     src_pitch = wined3d_surface_get_pitch(surface);
6013
6014     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6015      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6016      * called. */
6017     if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
6018     {
6019         TRACE("Removing the pbo attached to surface %p.\n", surface);
6020         surface_remove_pbo(surface, gl_info);
6021     }
6022
6023     if (format.convert)
6024     {
6025         /* This code is entered for texture formats which need a fixup. */
6026         UINT height = surface->resource.height;
6027
6028         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6029         dst_pitch = width * format.conv_byte_count;
6030         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6031
6032         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6033         {
6034             ERR("Out of memory (%u).\n", dst_pitch * height);
6035             context_release(context);
6036             return E_OUTOFMEMORY;
6037         }
6038         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6039     }
6040     else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
6041     {
6042         /* This code is only entered for color keying fixups */
6043         UINT height = surface->resource.height;
6044
6045         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6046         dst_pitch = width * format.conv_byte_count;
6047         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6048
6049         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6050         {
6051             ERR("Out of memory (%u).\n", dst_pitch * height);
6052             context_release(context);
6053             return E_OUTOFMEMORY;
6054         }
6055         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6056                 width, height, dst_pitch, convert, surface);
6057     }
6058     else
6059     {
6060         mem = surface->resource.allocatedMemory;
6061     }
6062
6063     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6064     data.addr = mem;
6065     surface_upload_data(surface, gl_info, &format, &src_rect, width, &dst_point, srgb, &data);
6066
6067     context_release(context);
6068
6069     /* Don't delete PBO memory. */
6070     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6071         HeapFree(GetProcessHeap(), 0, mem);
6072
6073     return WINED3D_OK;
6074 }
6075
6076 static void surface_multisample_resolve(struct wined3d_surface *surface)
6077 {
6078     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6079
6080     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6081         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6082
6083     surface_blt_fbo(surface->resource.device, WINED3DTEXF_POINT,
6084             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6085 }
6086
6087 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6088 {
6089     struct wined3d_device *device = surface->resource.device;
6090     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6091     HRESULT hr;
6092
6093     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6094
6095     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6096     {
6097         if (location == SFLAG_INTEXTURE)
6098         {
6099             struct wined3d_context *context = context_acquire(device, NULL);
6100             surface_load_ds_location(surface, context, SFLAG_DS_OFFSCREEN);
6101             context_release(context);
6102             return WINED3D_OK;
6103         }
6104         else
6105         {
6106             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6107             return WINED3DERR_INVALIDCALL;
6108         }
6109     }
6110
6111     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6112         location = SFLAG_INTEXTURE;
6113
6114     if (surface->flags & location)
6115     {
6116         TRACE("Location already up to date.\n");
6117         return WINED3D_OK;
6118     }
6119
6120     if (WARN_ON(d3d_surface))
6121     {
6122         DWORD required_access = resource_access_from_location(location);
6123         if ((surface->resource.access_flags & required_access) != required_access)
6124             WARN("Operation requires %#x access, but surface only has %#x.\n",
6125                     required_access, surface->resource.access_flags);
6126     }
6127
6128     if (!(surface->flags & SFLAG_LOCATIONS))
6129     {
6130         ERR("Surface %p does not have any up to date location.\n", surface);
6131         surface->flags |= SFLAG_LOST;
6132         return WINED3DERR_DEVICELOST;
6133     }
6134
6135     switch (location)
6136     {
6137         case SFLAG_INSYSMEM:
6138             surface_load_sysmem(surface, gl_info, rect);
6139             break;
6140
6141         case SFLAG_INDRAWABLE:
6142             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6143                 return hr;
6144             break;
6145
6146         case SFLAG_INRB_RESOLVED:
6147             surface_multisample_resolve(surface);
6148             break;
6149
6150         case SFLAG_INTEXTURE:
6151         case SFLAG_INSRGBTEX:
6152             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6153                 return hr;
6154             break;
6155
6156         default:
6157             ERR("Don't know how to handle location %#x.\n", location);
6158             break;
6159     }
6160
6161     if (!rect)
6162     {
6163         surface->flags |= location;
6164
6165         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6166             surface_evict_sysmem(surface);
6167     }
6168
6169     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6170             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6171     {
6172         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6173     }
6174
6175     return WINED3D_OK;
6176 }
6177
6178 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6179 {
6180     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6181
6182     /* Not on a swapchain - must be offscreen */
6183     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6184
6185     /* The front buffer is always onscreen */
6186     if (surface == swapchain->front_buffer) return FALSE;
6187
6188     /* If the swapchain is rendered to an FBO, the backbuffer is
6189      * offscreen, otherwise onscreen */
6190     return swapchain->render_to_fbo;
6191 }
6192
6193 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6194 /* Context activation is done by the caller. */
6195 static void ffp_blit_free(struct wined3d_device *device) { }
6196
6197 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6198 /* Context activation is done by the caller. */
6199 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6200 {
6201     BYTE table[256][4];
6202     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6203
6204     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6205
6206     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6207     ENTER_GL();
6208     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6209     LEAVE_GL();
6210 }
6211
6212 /* Context activation is done by the caller. */
6213 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, struct wined3d_surface *surface)
6214 {
6215     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6216
6217     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6218      * else the surface is converted in software at upload time in LoadLocation.
6219      */
6220     if(fixup == COMPLEX_FIXUP_P8 && context->gl_info->supported[EXT_PALETTED_TEXTURE])
6221         ffp_blit_p8_upload_palette(surface, context->gl_info);
6222
6223     ENTER_GL();
6224     glEnable(surface->texture_target);
6225     checkGLcall("glEnable(surface->texture_target)");
6226     LEAVE_GL();
6227     return WINED3D_OK;
6228 }
6229
6230 /* Context activation is done by the caller. */
6231 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6232 {
6233     ENTER_GL();
6234     glDisable(GL_TEXTURE_2D);
6235     checkGLcall("glDisable(GL_TEXTURE_2D)");
6236     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6237     {
6238         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6239         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6240     }
6241     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6242     {
6243         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6244         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6245     }
6246     LEAVE_GL();
6247 }
6248
6249 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6250         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6251         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6252 {
6253     enum complex_fixup src_fixup;
6254
6255     switch (blit_op)
6256     {
6257         case WINED3D_BLIT_OP_COLOR_BLIT:
6258             if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
6259                 return FALSE;
6260
6261             src_fixup = get_complex_fixup(src_format->color_fixup);
6262             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6263             {
6264                 TRACE("Checking support for fixup:\n");
6265                 dump_color_fixup_desc(src_format->color_fixup);
6266             }
6267
6268             if (!is_identity_fixup(dst_format->color_fixup))
6269             {
6270                 TRACE("Destination fixups are not supported\n");
6271                 return FALSE;
6272             }
6273
6274             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6275             {
6276                 TRACE("P8 fixup supported\n");
6277                 return TRUE;
6278             }
6279
6280             /* We only support identity conversions. */
6281             if (is_identity_fixup(src_format->color_fixup))
6282             {
6283                 TRACE("[OK]\n");
6284                 return TRUE;
6285             }
6286
6287             TRACE("[FAILED]\n");
6288             return FALSE;
6289
6290         case WINED3D_BLIT_OP_COLOR_FILL:
6291             if (dst_pool == WINED3DPOOL_SYSTEMMEM)
6292                 return FALSE;
6293
6294             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6295             {
6296                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6297                     return FALSE;
6298             }
6299             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6300             {
6301                 TRACE("Color fill not supported\n");
6302                 return FALSE;
6303             }
6304
6305             /* FIXME: We should reject color fills on formats with fixups,
6306              * but this would break P8 color fills for example. */
6307
6308             return TRUE;
6309
6310         case WINED3D_BLIT_OP_DEPTH_FILL:
6311             return TRUE;
6312
6313         default:
6314             TRACE("Unsupported blit_op=%d\n", blit_op);
6315             return FALSE;
6316     }
6317 }
6318
6319 /* Do not call while under the GL lock. */
6320 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6321         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
6322 {
6323     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6324     struct wined3d_fb_state fb = {&dst_surface, NULL};
6325
6326     return device_clear_render_targets(device, 1, &fb,
6327             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6328 }
6329
6330 /* Do not call while under the GL lock. */
6331 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6332         struct wined3d_surface *surface, const RECT *rect, float depth)
6333 {
6334     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6335     struct wined3d_fb_state fb = {NULL, surface};
6336
6337     return device_clear_render_targets(device, 0, &fb,
6338             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6339 }
6340
6341 const struct blit_shader ffp_blit =  {
6342     ffp_blit_alloc,
6343     ffp_blit_free,
6344     ffp_blit_set,
6345     ffp_blit_unset,
6346     ffp_blit_supported,
6347     ffp_blit_color_fill,
6348     ffp_blit_depth_fill,
6349 };
6350
6351 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6352 {
6353     return WINED3D_OK;
6354 }
6355
6356 /* Context activation is done by the caller. */
6357 static void cpu_blit_free(struct wined3d_device *device)
6358 {
6359 }
6360
6361 /* Context activation is done by the caller. */
6362 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, struct wined3d_surface *surface)
6363 {
6364     return WINED3D_OK;
6365 }
6366
6367 /* Context activation is done by the caller. */
6368 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6369 {
6370 }
6371
6372 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6373         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6374         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6375 {
6376     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6377     {
6378         return TRUE;
6379     }
6380
6381     return FALSE;
6382 }
6383
6384 static HRESULT surface_cpu_blt_compressed(const BYTE *src_data, BYTE *dst_data,
6385         UINT src_pitch, UINT dst_pitch, UINT update_w, UINT update_h,
6386         const struct wined3d_format *format, DWORD flags, const WINEDDBLTFX *fx)
6387 {
6388     UINT row_block_count;
6389     const BYTE *src_row;
6390     BYTE *dst_row;
6391     UINT x, y;
6392
6393     src_row = src_data;
6394     dst_row = dst_data;
6395
6396     row_block_count = (update_w + format->block_width - 1) / format->block_width;
6397
6398     if (!flags)
6399     {
6400         for (y = 0; y < update_h; y += format->block_height)
6401         {
6402             memcpy(dst_row, src_row, row_block_count * format->block_byte_count);
6403             src_row += src_pitch;
6404             dst_row += dst_pitch;
6405         }
6406
6407         return WINED3D_OK;
6408     }
6409
6410     if (flags == WINEDDBLT_DDFX && fx->dwDDFX == WINEDDBLTFX_MIRRORUPDOWN)
6411     {
6412         src_row += (((update_h / format->block_height) - 1) * src_pitch);
6413
6414         switch (format->id)
6415         {
6416             case WINED3DFMT_DXT1:
6417                 for (y = 0; y < update_h; y += format->block_height)
6418                 {
6419                     struct block
6420                     {
6421                         WORD color[2];
6422                         BYTE control_row[4];
6423                     };
6424
6425                     const struct block *s = (const struct block *)src_row;
6426                     struct block *d = (struct block *)dst_row;
6427
6428                     for (x = 0; x < row_block_count; ++x)
6429                     {
6430                         d[x].color[0] = s[x].color[0];
6431                         d[x].color[1] = s[x].color[1];
6432                         d[x].control_row[0] = s[x].control_row[3];
6433                         d[x].control_row[1] = s[x].control_row[2];
6434                         d[x].control_row[2] = s[x].control_row[1];
6435                         d[x].control_row[3] = s[x].control_row[0];
6436                     }
6437                     src_row -= src_pitch;
6438                     dst_row += dst_pitch;
6439                 }
6440                 return WINED3D_OK;
6441
6442             case WINED3DFMT_DXT3:
6443                 for (y = 0; y < update_h; y += format->block_height)
6444                 {
6445                     struct block
6446                     {
6447                         WORD alpha_row[4];
6448                         WORD color[2];
6449                         BYTE control_row[4];
6450                     };
6451
6452                     const struct block *s = (const struct block *)src_row;
6453                     struct block *d = (struct block *)dst_row;
6454
6455                     for (x = 0; x < row_block_count; ++x)
6456                     {
6457                         d[x].alpha_row[0] = s[x].alpha_row[3];
6458                         d[x].alpha_row[1] = s[x].alpha_row[2];
6459                         d[x].alpha_row[2] = s[x].alpha_row[1];
6460                         d[x].alpha_row[3] = s[x].alpha_row[0];
6461                         d[x].color[0] = s[x].color[0];
6462                         d[x].color[1] = s[x].color[1];
6463                         d[x].control_row[0] = s[x].control_row[3];
6464                         d[x].control_row[1] = s[x].control_row[2];
6465                         d[x].control_row[2] = s[x].control_row[1];
6466                         d[x].control_row[3] = s[x].control_row[0];
6467                     }
6468                     src_row -= src_pitch;
6469                     dst_row += dst_pitch;
6470                 }
6471                 return WINED3D_OK;
6472
6473             default:
6474                 FIXME("Compressed flip not implemented for format %s.\n",
6475                         debug_d3dformat(format->id));
6476                 return E_NOTIMPL;
6477         }
6478     }
6479
6480     FIXME("Unsupported blit on compressed surface (format %s, flags %#x, DDFX %#x).\n",
6481             debug_d3dformat(format->id), flags, flags & WINEDDBLT_DDFX ? fx->dwDDFX : 0);
6482
6483     return E_NOTIMPL;
6484 }
6485
6486 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6487         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6488         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
6489 {
6490     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6491     const struct wined3d_format *src_format, *dst_format;
6492     struct wined3d_surface *orig_src = src_surface;
6493     WINED3DLOCKED_RECT dlock, slock;
6494     HRESULT hr = WINED3D_OK;
6495     const BYTE *sbuf;
6496     RECT xdst,xsrc;
6497     BYTE *dbuf;
6498     int x, y;
6499
6500     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6501             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6502             flags, fx, debug_d3dtexturefiltertype(filter));
6503
6504     xsrc = *src_rect;
6505
6506     if (!src_surface)
6507     {
6508         RECT full_rect;
6509
6510         full_rect.left = 0;
6511         full_rect.top = 0;
6512         full_rect.right = dst_surface->resource.width;
6513         full_rect.bottom = dst_surface->resource.height;
6514         IntersectRect(&xdst, &full_rect, dst_rect);
6515     }
6516     else
6517     {
6518         BOOL clip_horiz, clip_vert;
6519
6520         xdst = *dst_rect;
6521         clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6522         clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6523
6524         if (clip_vert || clip_horiz)
6525         {
6526             /* Now check if this is a special case or not... */
6527             if ((flags & WINEDDBLT_DDFX)
6528                     || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6529                     || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6530             {
6531                 WARN("Out of screen rectangle in special case. Not handled right now.\n");
6532                 return WINED3D_OK;
6533             }
6534
6535             if (clip_horiz)
6536             {
6537                 if (xdst.left < 0)
6538                 {
6539                     xsrc.left -= xdst.left;
6540                     xdst.left = 0;
6541                 }
6542                 if (xdst.right > dst_surface->resource.width)
6543                 {
6544                     xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6545                     xdst.right = (int)dst_surface->resource.width;
6546                 }
6547             }
6548
6549             if (clip_vert)
6550             {
6551                 if (xdst.top < 0)
6552                 {
6553                     xsrc.top -= xdst.top;
6554                     xdst.top = 0;
6555                 }
6556                 if (xdst.bottom > dst_surface->resource.height)
6557                 {
6558                     xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6559                     xdst.bottom = (int)dst_surface->resource.height;
6560                 }
6561             }
6562
6563             /* And check if after clipping something is still to be done... */
6564             if ((xdst.right <= 0) || (xdst.bottom <= 0)
6565                     || (xdst.left >= (int)dst_surface->resource.width)
6566                     || (xdst.top >= (int)dst_surface->resource.height)
6567                     || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6568                     || (xsrc.left >= (int)src_surface->resource.width)
6569                     || (xsrc.top >= (int)src_surface->resource.height))
6570             {
6571                 TRACE("Nothing to be done after clipping.\n");
6572                 return WINED3D_OK;
6573             }
6574         }
6575     }
6576
6577     if (src_surface == dst_surface)
6578     {
6579         wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6580         slock = dlock;
6581         src_format = dst_surface->resource.format;
6582         dst_format = src_format;
6583     }
6584     else
6585     {
6586         dst_format = dst_surface->resource.format;
6587         if (src_surface)
6588         {
6589             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6590             {
6591                 src_surface = surface_convert_format(src_surface, dst_format->id);
6592                 if (!src_surface)
6593                 {
6594                     /* The conv function writes a FIXME */
6595                     WARN("Cannot convert source surface format to dest format.\n");
6596                     goto release;
6597                 }
6598             }
6599             wined3d_surface_map(src_surface, &slock, NULL, WINED3DLOCK_READONLY);
6600             src_format = src_surface->resource.format;
6601         }
6602         else
6603         {
6604             src_format = dst_format;
6605         }
6606         if (dst_rect)
6607             wined3d_surface_map(dst_surface, &dlock, &xdst, 0);
6608         else
6609             wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6610     }
6611
6612     bpp = dst_surface->resource.format->byte_count;
6613     srcheight = xsrc.bottom - xsrc.top;
6614     srcwidth = xsrc.right - xsrc.left;
6615     dstheight = xdst.bottom - xdst.top;
6616     dstwidth = xdst.right - xdst.left;
6617     width = (xdst.right - xdst.left) * bpp;
6618
6619     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_COMPRESSED)
6620     {
6621         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6622
6623         if (src_surface == dst_surface)
6624         {
6625             FIXME("Only plain blits supported on compressed surfaces.\n");
6626             hr = E_NOTIMPL;
6627             goto release;
6628         }
6629
6630         if (srcheight != dstheight || srcwidth != dstwidth)
6631         {
6632             WARN("Stretching not supported on compressed surfaces.\n");
6633             hr = WINED3DERR_INVALIDCALL;
6634             goto release;
6635         }
6636
6637         if (srcwidth & (src_format->block_width - 1) || srcheight & (src_format->block_height - 1))
6638         {
6639             WARN("Rectangle not block-aligned.\n");
6640             hr = WINED3DERR_INVALIDCALL;
6641             goto release;
6642         }
6643
6644         hr = surface_cpu_blt_compressed(slock.pBits, dlock.pBits,
6645                 slock.Pitch, dlock.Pitch, dstwidth, dstheight,
6646                 src_format, flags, fx);
6647         goto release;
6648     }
6649
6650     if (dst_rect && src_surface != dst_surface)
6651         dbuf = dlock.pBits;
6652     else
6653         dbuf = (BYTE*)dlock.pBits+(xdst.top*dlock.Pitch)+(xdst.left*bpp);
6654
6655     /* First, all the 'source-less' blits */
6656     if (flags & WINEDDBLT_COLORFILL)
6657     {
6658         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dlock.Pitch, fx->u5.dwFillColor);
6659         flags &= ~WINEDDBLT_COLORFILL;
6660     }
6661
6662     if (flags & WINEDDBLT_DEPTHFILL)
6663     {
6664         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6665     }
6666     if (flags & WINEDDBLT_ROP)
6667     {
6668         /* Catch some degenerate cases here. */
6669         switch (fx->dwROP)
6670         {
6671             case BLACKNESS:
6672                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,0);
6673                 break;
6674             case 0xAA0029: /* No-op */
6675                 break;
6676             case WHITENESS:
6677                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,~0);
6678                 break;
6679             case SRCCOPY: /* Well, we do that below? */
6680                 break;
6681             default:
6682                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6683                 goto error;
6684         }
6685         flags &= ~WINEDDBLT_ROP;
6686     }
6687     if (flags & WINEDDBLT_DDROPS)
6688     {
6689         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6690     }
6691     /* Now the 'with source' blits. */
6692     if (src_surface)
6693     {
6694         const BYTE *sbase;
6695         int sx, xinc, sy, yinc;
6696
6697         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6698             goto release;
6699
6700         if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
6701                 && (srcwidth != dstwidth || srcheight != dstheight))
6702         {
6703             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6704             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6705         }
6706
6707         sbase = (BYTE*)slock.pBits+(xsrc.top*slock.Pitch)+xsrc.left*bpp;
6708         xinc = (srcwidth << 16) / dstwidth;
6709         yinc = (srcheight << 16) / dstheight;
6710
6711         if (!flags)
6712         {
6713             /* No effects, we can cheat here. */
6714             if (dstwidth == srcwidth)
6715             {
6716                 if (dstheight == srcheight)
6717                 {
6718                     /* No stretching in either direction. This needs to be as
6719                      * fast as possible. */
6720                     sbuf = sbase;
6721
6722                     /* Check for overlapping surfaces. */
6723                     if (src_surface != dst_surface || xdst.top < xsrc.top
6724                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6725                     {
6726                         /* No overlap, or dst above src, so copy from top downwards. */
6727                         for (y = 0; y < dstheight; ++y)
6728                         {
6729                             memcpy(dbuf, sbuf, width);
6730                             sbuf += slock.Pitch;
6731                             dbuf += dlock.Pitch;
6732                         }
6733                     }
6734                     else if (xdst.top > xsrc.top)
6735                     {
6736                         /* Copy from bottom upwards. */
6737                         sbuf += (slock.Pitch*dstheight);
6738                         dbuf += (dlock.Pitch*dstheight);
6739                         for (y = 0; y < dstheight; ++y)
6740                         {
6741                             sbuf -= slock.Pitch;
6742                             dbuf -= dlock.Pitch;
6743                             memcpy(dbuf, sbuf, width);
6744                         }
6745                     }
6746                     else
6747                     {
6748                         /* Src and dst overlapping on the same line, use memmove. */
6749                         for (y = 0; y < dstheight; ++y)
6750                         {
6751                             memmove(dbuf, sbuf, width);
6752                             sbuf += slock.Pitch;
6753                             dbuf += dlock.Pitch;
6754                         }
6755                     }
6756                 }
6757                 else
6758                 {
6759                     /* Stretching in y direction only. */
6760                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6761                     {
6762                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6763                         memcpy(dbuf, sbuf, width);
6764                         dbuf += dlock.Pitch;
6765                     }
6766                 }
6767             }
6768             else
6769             {
6770                 /* Stretching in X direction. */
6771                 int last_sy = -1;
6772                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6773                 {
6774                     sbuf = sbase + (sy >> 16) * slock.Pitch;
6775
6776                     if ((sy >> 16) == (last_sy >> 16))
6777                     {
6778                         /* This source row is the same as last source row -
6779                          * Copy the already stretched row. */
6780                         memcpy(dbuf, dbuf - dlock.Pitch, width);
6781                     }
6782                     else
6783                     {
6784 #define STRETCH_ROW(type) \
6785 do { \
6786     const type *s = (const type *)sbuf; \
6787     type *d = (type *)dbuf; \
6788     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6789         d[x] = s[sx >> 16]; \
6790 } while(0)
6791
6792                         switch(bpp)
6793                         {
6794                             case 1:
6795                                 STRETCH_ROW(BYTE);
6796                                 break;
6797                             case 2:
6798                                 STRETCH_ROW(WORD);
6799                                 break;
6800                             case 4:
6801                                 STRETCH_ROW(DWORD);
6802                                 break;
6803                             case 3:
6804                             {
6805                                 const BYTE *s;
6806                                 BYTE *d = dbuf;
6807                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6808                                 {
6809                                     DWORD pixel;
6810
6811                                     s = sbuf + 3 * (sx >> 16);
6812                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6813                                     d[0] = (pixel      ) & 0xff;
6814                                     d[1] = (pixel >>  8) & 0xff;
6815                                     d[2] = (pixel >> 16) & 0xff;
6816                                     d += 3;
6817                                 }
6818                                 break;
6819                             }
6820                             default:
6821                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6822                                 hr = WINED3DERR_NOTAVAILABLE;
6823                                 goto error;
6824                         }
6825 #undef STRETCH_ROW
6826                     }
6827                     dbuf += dlock.Pitch;
6828                     last_sy = sy;
6829                 }
6830             }
6831         }
6832         else
6833         {
6834             LONG dstyinc = dlock.Pitch, dstxinc = bpp;
6835             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6836             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6837             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6838             {
6839                 /* The color keying flags are checked for correctness in ddraw */
6840                 if (flags & WINEDDBLT_KEYSRC)
6841                 {
6842                     keylow  = src_surface->SrcBltCKey.dwColorSpaceLowValue;
6843                     keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
6844                 }
6845                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6846                 {
6847                     keylow = fx->ddckSrcColorkey.dwColorSpaceLowValue;
6848                     keyhigh = fx->ddckSrcColorkey.dwColorSpaceHighValue;
6849                 }
6850
6851                 if (flags & WINEDDBLT_KEYDEST)
6852                 {
6853                     /* Destination color keys are taken from the source surface! */
6854                     destkeylow = src_surface->DestBltCKey.dwColorSpaceLowValue;
6855                     destkeyhigh = src_surface->DestBltCKey.dwColorSpaceHighValue;
6856                 }
6857                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6858                 {
6859                     destkeylow = fx->ddckDestColorkey.dwColorSpaceLowValue;
6860                     destkeyhigh = fx->ddckDestColorkey.dwColorSpaceHighValue;
6861                 }
6862
6863                 if (bpp == 1)
6864                 {
6865                     keymask = 0xff;
6866                 }
6867                 else
6868                 {
6869                     keymask = src_format->red_mask
6870                             | src_format->green_mask
6871                             | src_format->blue_mask;
6872                 }
6873                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6874             }
6875
6876             if (flags & WINEDDBLT_DDFX)
6877             {
6878                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6879                 LONG tmpxy;
6880                 dTopLeft     = dbuf;
6881                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6882                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dlock.Pitch);
6883                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6884
6885                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6886                 {
6887                     /* I don't think we need to do anything about this flag */
6888                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6889                 }
6890                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6891                 {
6892                     tmp          = dTopRight;
6893                     dTopRight    = dTopLeft;
6894                     dTopLeft     = tmp;
6895                     tmp          = dBottomRight;
6896                     dBottomRight = dBottomLeft;
6897                     dBottomLeft  = tmp;
6898                     dstxinc = dstxinc * -1;
6899                 }
6900                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6901                 {
6902                     tmp          = dTopLeft;
6903                     dTopLeft     = dBottomLeft;
6904                     dBottomLeft  = tmp;
6905                     tmp          = dTopRight;
6906                     dTopRight    = dBottomRight;
6907                     dBottomRight = tmp;
6908                     dstyinc = dstyinc * -1;
6909                 }
6910                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6911                 {
6912                     /* I don't think we need to do anything about this flag */
6913                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6914                 }
6915                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6916                 {
6917                     tmp          = dBottomRight;
6918                     dBottomRight = dTopLeft;
6919                     dTopLeft     = tmp;
6920                     tmp          = dBottomLeft;
6921                     dBottomLeft  = dTopRight;
6922                     dTopRight    = tmp;
6923                     dstxinc = dstxinc * -1;
6924                     dstyinc = dstyinc * -1;
6925                 }
6926                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6927                 {
6928                     tmp          = dTopLeft;
6929                     dTopLeft     = dBottomLeft;
6930                     dBottomLeft  = dBottomRight;
6931                     dBottomRight = dTopRight;
6932                     dTopRight    = tmp;
6933                     tmpxy   = dstxinc;
6934                     dstxinc = dstyinc;
6935                     dstyinc = tmpxy;
6936                     dstxinc = dstxinc * -1;
6937                 }
6938                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6939                 {
6940                     tmp          = dTopLeft;
6941                     dTopLeft     = dTopRight;
6942                     dTopRight    = dBottomRight;
6943                     dBottomRight = dBottomLeft;
6944                     dBottomLeft  = tmp;
6945                     tmpxy   = dstxinc;
6946                     dstxinc = dstyinc;
6947                     dstyinc = tmpxy;
6948                     dstyinc = dstyinc * -1;
6949                 }
6950                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6951                 {
6952                     /* I don't think we need to do anything about this flag */
6953                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6954                 }
6955                 dbuf = dTopLeft;
6956                 flags &= ~(WINEDDBLT_DDFX);
6957             }
6958
6959 #define COPY_COLORKEY_FX(type) \
6960 do { \
6961     const type *s; \
6962     type *d = (type *)dbuf, *dx, tmp; \
6963     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
6964     { \
6965         s = (const type *)(sbase + (sy >> 16) * slock.Pitch); \
6966         dx = d; \
6967         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6968         { \
6969             tmp = s[sx >> 16]; \
6970             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
6971                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
6972             { \
6973                 dx[0] = tmp; \
6974             } \
6975             dx = (type *)(((BYTE *)dx) + dstxinc); \
6976         } \
6977         d = (type *)(((BYTE *)d) + dstyinc); \
6978     } \
6979 } while(0)
6980
6981             switch (bpp)
6982             {
6983                 case 1:
6984                     COPY_COLORKEY_FX(BYTE);
6985                     break;
6986                 case 2:
6987                     COPY_COLORKEY_FX(WORD);
6988                     break;
6989                 case 4:
6990                     COPY_COLORKEY_FX(DWORD);
6991                     break;
6992                 case 3:
6993                 {
6994                     const BYTE *s;
6995                     BYTE *d = dbuf, *dx;
6996                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6997                     {
6998                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6999                         dx = d;
7000                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7001                         {
7002                             DWORD pixel, dpixel = 0;
7003                             s = sbuf + 3 * (sx>>16);
7004                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7005                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7006                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7007                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7008                             {
7009                                 dx[0] = (pixel      ) & 0xff;
7010                                 dx[1] = (pixel >>  8) & 0xff;
7011                                 dx[2] = (pixel >> 16) & 0xff;
7012                             }
7013                             dx += dstxinc;
7014                         }
7015                         d += dstyinc;
7016                     }
7017                     break;
7018                 }
7019                 default:
7020                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7021                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7022                     hr = WINED3DERR_NOTAVAILABLE;
7023                     goto error;
7024 #undef COPY_COLORKEY_FX
7025             }
7026         }
7027     }
7028
7029 error:
7030     if (flags && FIXME_ON(d3d_surface))
7031     {
7032         FIXME("\tUnsupported flags: %#x.\n", flags);
7033     }
7034
7035 release:
7036     wined3d_surface_unmap(dst_surface);
7037     if (src_surface && src_surface != dst_surface)
7038         wined3d_surface_unmap(src_surface);
7039     /* Release the converted surface, if any. */
7040     if (src_surface && src_surface != orig_src)
7041         wined3d_surface_decref(src_surface);
7042
7043     return hr;
7044 }
7045
7046 /* Do not call while under the GL lock. */
7047 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7048         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
7049 {
7050     static const RECT src_rect;
7051     WINEDDBLTFX BltFx;
7052
7053     memset(&BltFx, 0, sizeof(BltFx));
7054     BltFx.dwSize = sizeof(BltFx);
7055     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7056     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7057             WINEDDBLT_COLORFILL, &BltFx, WINED3DTEXF_POINT);
7058 }
7059
7060 /* Do not call while under the GL lock. */
7061 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7062         struct wined3d_surface *surface, const RECT *rect, float depth)
7063 {
7064     FIXME("Depth filling not implemented by cpu_blit.\n");
7065     return WINED3DERR_INVALIDCALL;
7066 }
7067
7068 const struct blit_shader cpu_blit =  {
7069     cpu_blit_alloc,
7070     cpu_blit_free,
7071     cpu_blit_set,
7072     cpu_blit_unset,
7073     cpu_blit_supported,
7074     cpu_blit_color_fill,
7075     cpu_blit_depth_fill,
7076 };
7077
7078 static HRESULT surface_init(struct wined3d_surface *surface, WINED3DSURFTYPE surface_type, UINT alignment,
7079         UINT width, UINT height, UINT level, BOOL lockable, BOOL discard, WINED3DMULTISAMPLE_TYPE multisample_type,
7080         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7081         WINED3DPOOL pool, void *parent, const struct wined3d_parent_ops *parent_ops)
7082 {
7083     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7084     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7085     unsigned int resource_size;
7086     HRESULT hr;
7087
7088     if (multisample_quality > 0)
7089     {
7090         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7091         multisample_quality = 0;
7092     }
7093
7094     /* Quick lockable sanity check.
7095      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7096      * this function is too deep to need to care about things like this.
7097      * Levels need to be checked too, since they all affect what can be done. */
7098     switch (pool)
7099     {
7100         case WINED3DPOOL_SCRATCH:
7101             if (!lockable)
7102             {
7103                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7104                         "which are mutually exclusive, setting lockable to TRUE.\n");
7105                 lockable = TRUE;
7106             }
7107             break;
7108
7109         case WINED3DPOOL_SYSTEMMEM:
7110             if (!lockable)
7111                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7112             break;
7113
7114         case WINED3DPOOL_MANAGED:
7115             if (usage & WINED3DUSAGE_DYNAMIC)
7116                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7117             break;
7118
7119         case WINED3DPOOL_DEFAULT:
7120             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7121                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7122             break;
7123
7124         default:
7125             FIXME("Unknown pool %#x.\n", pool);
7126             break;
7127     };
7128
7129     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3DPOOL_DEFAULT)
7130         FIXME("Trying to create a render target that isn't in the default pool.\n");
7131
7132     /* FIXME: Check that the format is supported by the device. */
7133
7134     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7135     if (!resource_size)
7136         return WINED3DERR_INVALIDCALL;
7137
7138     surface->surface_type = surface_type;
7139
7140     switch (surface_type)
7141     {
7142         case SURFACE_OPENGL:
7143             surface->surface_ops = &surface_ops;
7144             break;
7145
7146         case SURFACE_GDI:
7147             surface->surface_ops = &gdi_surface_ops;
7148             break;
7149
7150         default:
7151             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7152             return WINED3DERR_INVALIDCALL;
7153     }
7154
7155     hr = resource_init(&surface->resource, device, WINED3DRTYPE_SURFACE, format,
7156             multisample_type, multisample_quality, usage, pool, width, height, 1,
7157             resource_size, parent, parent_ops, &surface_resource_ops);
7158     if (FAILED(hr))
7159     {
7160         WARN("Failed to initialize resource, returning %#x.\n", hr);
7161         return hr;
7162     }
7163
7164     /* "Standalone" surface. */
7165     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7166
7167     surface->texture_level = level;
7168     list_init(&surface->overlays);
7169
7170     /* Flags */
7171     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7172     if (discard)
7173         surface->flags |= SFLAG_DISCARD;
7174     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7175         surface->flags |= SFLAG_LOCKABLE;
7176     /* I'm not sure if this qualifies as a hack or as an optimization. It
7177      * seems reasonable to assume that lockable render targets will get
7178      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7179      * creation. However, the other reason we want to do this is that several
7180      * ddraw applications access surface memory while the surface isn't
7181      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7182      * future locks prevents these from crashing. */
7183     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7184         surface->flags |= SFLAG_DYNLOCK;
7185
7186     /* Mark the texture as dirty so that it gets loaded first time around. */
7187     surface_add_dirty_rect(surface, NULL);
7188     list_init(&surface->renderbuffers);
7189
7190     TRACE("surface %p, memory %p, size %u\n",
7191             surface, surface->resource.allocatedMemory, surface->resource.size);
7192
7193     /* Call the private setup routine */
7194     hr = surface->surface_ops->surface_private_setup(surface);
7195     if (FAILED(hr))
7196     {
7197         ERR("Private setup failed, returning %#x\n", hr);
7198         surface->surface_ops->surface_cleanup(surface);
7199         return hr;
7200     }
7201
7202     return hr;
7203 }
7204
7205 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7206         enum wined3d_format_id format_id, BOOL lockable, BOOL discard, UINT level, DWORD usage, WINED3DPOOL pool,
7207         WINED3DMULTISAMPLE_TYPE multisample_type, DWORD multisample_quality, WINED3DSURFTYPE surface_type,
7208         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7209 {
7210     struct wined3d_surface *object;
7211     HRESULT hr;
7212
7213     TRACE("device %p, width %u, height %u, format %s, lockable %#x, discard %#x, level %u\n",
7214             device, width, height, debug_d3dformat(format_id), lockable, discard, level);
7215     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7216             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7217     TRACE("surface_type %#x, parent %p, parent_ops %p.\n", surface_type, parent, parent_ops);
7218
7219     if (surface_type == SURFACE_OPENGL && !device->adapter)
7220     {
7221         ERR("OpenGL surfaces are not available without OpenGL.\n");
7222         return WINED3DERR_NOTAVAILABLE;
7223     }
7224
7225     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7226     if (!object)
7227     {
7228         ERR("Failed to allocate surface memory.\n");
7229         return WINED3DERR_OUTOFVIDEOMEMORY;
7230     }
7231
7232     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level, lockable,
7233             discard, multisample_type, multisample_quality, device, usage, format_id, pool, parent, parent_ops);
7234     if (FAILED(hr))
7235     {
7236         WARN("Failed to initialize surface, returning %#x.\n", hr);
7237         HeapFree(GetProcessHeap(), 0, object);
7238         return hr;
7239     }
7240
7241     TRACE("Created surface %p.\n", object);
7242     *surface = object;
7243
7244     return hr;
7245 }