wined3d: Use draw_binding in surface_unmap().
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2008 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         WINED3DTEXTUREFILTERTYPE filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     TRACE("surface %p.\n", surface);
46
47     if (surface->texture_name || (surface->flags & SFLAG_PBO) || !list_empty(&surface->renderbuffers))
48     {
49         struct wined3d_renderbuffer_entry *entry, *entry2;
50         const struct wined3d_gl_info *gl_info;
51         struct wined3d_context *context;
52
53         context = context_acquire(surface->resource.device, NULL);
54         gl_info = context->gl_info;
55
56         ENTER_GL();
57
58         if (surface->texture_name)
59         {
60             TRACE("Deleting texture %u.\n", surface->texture_name);
61             glDeleteTextures(1, &surface->texture_name);
62         }
63
64         if (surface->flags & SFLAG_PBO)
65         {
66             TRACE("Deleting PBO %u.\n", surface->pbo);
67             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
68         }
69
70         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
71         {
72             TRACE("Deleting renderbuffer %u.\n", entry->id);
73             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
74             HeapFree(GetProcessHeap(), 0, entry);
75         }
76
77         LEAVE_GL();
78
79         context_release(context);
80     }
81
82     if (surface->flags & SFLAG_DIBSECTION)
83     {
84         /* Release the DC. */
85         SelectObject(surface->hDC, surface->dib.holdbitmap);
86         DeleteDC(surface->hDC);
87         /* Release the DIB section. */
88         DeleteObject(surface->dib.DIBsection);
89         surface->dib.bitmap_data = NULL;
90         surface->resource.allocatedMemory = NULL;
91     }
92
93     if (surface->flags & SFLAG_USERPTR)
94         wined3d_surface_set_mem(surface, NULL);
95     if (surface->overlay_dest)
96         list_remove(&surface->overlay_entry);
97
98     HeapFree(GetProcessHeap(), 0, surface->palette9);
99
100     resource_cleanup(&surface->resource);
101 }
102
103 void surface_update_draw_binding(struct wined3d_surface *surface)
104 {
105     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
106         surface->draw_binding = SFLAG_INDRAWABLE;
107     else
108         surface->draw_binding = SFLAG_INTEXTURE;
109 }
110
111 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
112 {
113     TRACE("surface %p, container %p.\n", surface, container);
114
115     if (!container && type != WINED3D_CONTAINER_NONE)
116         ERR("Setting NULL container of type %#x.\n", type);
117
118     if (type == WINED3D_CONTAINER_SWAPCHAIN)
119     {
120         surface->get_drawable_size = get_drawable_size_swapchain;
121     }
122     else
123     {
124         switch (wined3d_settings.offscreen_rendering_mode)
125         {
126             case ORM_FBO:
127                 surface->get_drawable_size = get_drawable_size_fbo;
128                 break;
129
130             case ORM_BACKBUFFER:
131                 surface->get_drawable_size = get_drawable_size_backbuffer;
132                 break;
133
134             default:
135                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
136                 return;
137         }
138     }
139
140     surface->container.type = type;
141     surface->container.u.base = container;
142     surface_update_draw_binding(surface);
143 }
144
145 struct blt_info
146 {
147     GLenum binding;
148     GLenum bind_target;
149     enum tex_types tex_type;
150     GLfloat coords[4][3];
151 };
152
153 struct float_rect
154 {
155     float l;
156     float t;
157     float r;
158     float b;
159 };
160
161 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
162 {
163     f->l = ((r->left * 2.0f) / w) - 1.0f;
164     f->t = ((r->top * 2.0f) / h) - 1.0f;
165     f->r = ((r->right * 2.0f) / w) - 1.0f;
166     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
167 }
168
169 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
170 {
171     GLfloat (*coords)[3] = info->coords;
172     struct float_rect f;
173
174     switch (target)
175     {
176         default:
177             FIXME("Unsupported texture target %#x\n", target);
178             /* Fall back to GL_TEXTURE_2D */
179         case GL_TEXTURE_2D:
180             info->binding = GL_TEXTURE_BINDING_2D;
181             info->bind_target = GL_TEXTURE_2D;
182             info->tex_type = tex_2d;
183             coords[0][0] = (float)rect->left / w;
184             coords[0][1] = (float)rect->top / h;
185             coords[0][2] = 0.0f;
186
187             coords[1][0] = (float)rect->right / w;
188             coords[1][1] = (float)rect->top / h;
189             coords[1][2] = 0.0f;
190
191             coords[2][0] = (float)rect->left / w;
192             coords[2][1] = (float)rect->bottom / h;
193             coords[2][2] = 0.0f;
194
195             coords[3][0] = (float)rect->right / w;
196             coords[3][1] = (float)rect->bottom / h;
197             coords[3][2] = 0.0f;
198             break;
199
200         case GL_TEXTURE_RECTANGLE_ARB:
201             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
202             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
203             info->tex_type = tex_rect;
204             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
205             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
206             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
207             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
208             break;
209
210         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
211             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
212             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
213             info->tex_type = tex_cube;
214             cube_coords_float(rect, w, h, &f);
215
216             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
217             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
218             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
219             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
220             break;
221
222         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
223             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
224             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
225             info->tex_type = tex_cube;
226             cube_coords_float(rect, w, h, &f);
227
228             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
229             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
230             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
231             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
232             break;
233
234         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
235             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
236             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
237             info->tex_type = tex_cube;
238             cube_coords_float(rect, w, h, &f);
239
240             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
241             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
242             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
243             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
244             break;
245
246         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
247             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
248             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
249             info->tex_type = tex_cube;
250             cube_coords_float(rect, w, h, &f);
251
252             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
253             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
254             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
255             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
256             break;
257
258         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
259             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
260             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
261             info->tex_type = tex_cube;
262             cube_coords_float(rect, w, h, &f);
263
264             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
265             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
266             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
267             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
268             break;
269
270         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
271             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
272             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
273             info->tex_type = tex_cube;
274             cube_coords_float(rect, w, h, &f);
275
276             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
277             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
278             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
279             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
280             break;
281     }
282 }
283
284 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
285 {
286     if (rect_in)
287         *rect_out = *rect_in;
288     else
289     {
290         rect_out->left = 0;
291         rect_out->top = 0;
292         rect_out->right = surface->resource.width;
293         rect_out->bottom = surface->resource.height;
294     }
295 }
296
297 /* GL locking and context activation is done by the caller */
298 void draw_textured_quad(const struct wined3d_surface *src_surface, const RECT *src_rect,
299         const RECT *dst_rect, WINED3DTEXTUREFILTERTYPE Filter)
300 {
301     struct blt_info info;
302
303     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
304
305     glEnable(info.bind_target);
306     checkGLcall("glEnable(bind_target)");
307
308     /* Bind the texture */
309     glBindTexture(info.bind_target, src_surface->texture_name);
310     checkGLcall("glBindTexture");
311
312     /* Filtering for StretchRect */
313     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
314             wined3d_gl_mag_filter(magLookup, Filter));
315     checkGLcall("glTexParameteri");
316     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
317             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
318     checkGLcall("glTexParameteri");
319     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
320     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
321     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
322     checkGLcall("glTexEnvi");
323
324     /* Draw a quad */
325     glBegin(GL_TRIANGLE_STRIP);
326     glTexCoord3fv(info.coords[0]);
327     glVertex2i(dst_rect->left, dst_rect->top);
328
329     glTexCoord3fv(info.coords[1]);
330     glVertex2i(dst_rect->right, dst_rect->top);
331
332     glTexCoord3fv(info.coords[2]);
333     glVertex2i(dst_rect->left, dst_rect->bottom);
334
335     glTexCoord3fv(info.coords[3]);
336     glVertex2i(dst_rect->right, dst_rect->bottom);
337     glEnd();
338
339     /* Unbind the texture */
340     glBindTexture(info.bind_target, 0);
341     checkGLcall("glBindTexture(info->bind_target, 0)");
342
343     /* We changed the filtering settings on the texture. Inform the
344      * container about this to get the filters reset properly next draw. */
345     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
346     {
347         struct wined3d_texture *texture = src_surface->container.u.texture;
348         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3DTEXF_POINT;
349         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3DTEXF_POINT;
350         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3DTEXF_NONE;
351     }
352 }
353
354 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
355 {
356     const struct wined3d_format *format = surface->resource.format;
357     SYSTEM_INFO sysInfo;
358     BITMAPINFO *b_info;
359     int extraline = 0;
360     DWORD *masks;
361     UINT usage;
362     HDC dc;
363
364     TRACE("surface %p.\n", surface);
365
366     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
367     {
368         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
369         return WINED3DERR_INVALIDCALL;
370     }
371
372     switch (format->byte_count)
373     {
374         case 2:
375         case 4:
376             /* Allocate extra space to store the RGB bit masks. */
377             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
378             break;
379
380         case 3:
381             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
382             break;
383
384         default:
385             /* Allocate extra space for a palette. */
386             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
387                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
388             break;
389     }
390
391     if (!b_info)
392         return E_OUTOFMEMORY;
393
394     /* Some applications access the surface in via DWORDs, and do not take
395      * the necessary care at the end of the surface. So we need at least
396      * 4 extra bytes at the end of the surface. Check against the page size,
397      * if the last page used for the surface has at least 4 spare bytes we're
398      * safe, otherwise add an extra line to the DIB section. */
399     GetSystemInfo(&sysInfo);
400     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
401     {
402         extraline = 1;
403         TRACE("Adding an extra line to the DIB section.\n");
404     }
405
406     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
407     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
408     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
409     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
410     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
411             * wined3d_surface_get_pitch(surface);
412     b_info->bmiHeader.biPlanes = 1;
413     b_info->bmiHeader.biBitCount = format->byte_count * 8;
414
415     b_info->bmiHeader.biXPelsPerMeter = 0;
416     b_info->bmiHeader.biYPelsPerMeter = 0;
417     b_info->bmiHeader.biClrUsed = 0;
418     b_info->bmiHeader.biClrImportant = 0;
419
420     /* Get the bit masks */
421     masks = (DWORD *)b_info->bmiColors;
422     switch (surface->resource.format->id)
423     {
424         case WINED3DFMT_B8G8R8_UNORM:
425             usage = DIB_RGB_COLORS;
426             b_info->bmiHeader.biCompression = BI_RGB;
427             break;
428
429         case WINED3DFMT_B5G5R5X1_UNORM:
430         case WINED3DFMT_B5G5R5A1_UNORM:
431         case WINED3DFMT_B4G4R4A4_UNORM:
432         case WINED3DFMT_B4G4R4X4_UNORM:
433         case WINED3DFMT_B2G3R3_UNORM:
434         case WINED3DFMT_B2G3R3A8_UNORM:
435         case WINED3DFMT_R10G10B10A2_UNORM:
436         case WINED3DFMT_R8G8B8A8_UNORM:
437         case WINED3DFMT_R8G8B8X8_UNORM:
438         case WINED3DFMT_B10G10R10A2_UNORM:
439         case WINED3DFMT_B5G6R5_UNORM:
440         case WINED3DFMT_R16G16B16A16_UNORM:
441             usage = 0;
442             b_info->bmiHeader.biCompression = BI_BITFIELDS;
443             masks[0] = format->red_mask;
444             masks[1] = format->green_mask;
445             masks[2] = format->blue_mask;
446             break;
447
448         default:
449             /* Don't know palette */
450             b_info->bmiHeader.biCompression = BI_RGB;
451             usage = 0;
452             break;
453     }
454
455     if (!(dc = GetDC(0)))
456     {
457         HeapFree(GetProcessHeap(), 0, b_info);
458         return HRESULT_FROM_WIN32(GetLastError());
459     }
460
461     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
462             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
463             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
464     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
465     ReleaseDC(0, dc);
466
467     if (!surface->dib.DIBsection)
468     {
469         ERR("Failed to create DIB section.\n");
470         HeapFree(GetProcessHeap(), 0, b_info);
471         return HRESULT_FROM_WIN32(GetLastError());
472     }
473
474     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
475     /* Copy the existing surface to the dib section. */
476     if (surface->resource.allocatedMemory)
477     {
478         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
479                 surface->resource.height * wined3d_surface_get_pitch(surface));
480     }
481     else
482     {
483         /* This is to make maps read the GL texture although memory is allocated. */
484         surface->flags &= ~SFLAG_INSYSMEM;
485     }
486     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
487
488     HeapFree(GetProcessHeap(), 0, b_info);
489
490     /* Now allocate a DC. */
491     surface->hDC = CreateCompatibleDC(0);
492     surface->dib.holdbitmap = SelectObject(surface->hDC, surface->dib.DIBsection);
493     TRACE("Using wined3d palette %p.\n", surface->palette);
494     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
495
496     surface->flags |= SFLAG_DIBSECTION;
497
498     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
499     surface->resource.heapMemory = NULL;
500
501     return WINED3D_OK;
502 }
503
504 static void surface_prepare_system_memory(struct wined3d_surface *surface)
505 {
506     struct wined3d_device *device = surface->resource.device;
507     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
508
509     TRACE("surface %p.\n", surface);
510
511     /* Performance optimization: Count how often a surface is locked, if it is
512      * locked regularly do not throw away the system memory copy. This avoids
513      * the need to download the surface from OpenGL all the time. The surface
514      * is still downloaded if the OpenGL texture is changed. */
515     if (!(surface->flags & SFLAG_DYNLOCK))
516     {
517         if (++surface->lockCount > MAXLOCKCOUNT)
518         {
519             TRACE("Surface is locked regularly, not freeing the system memory copy any more.\n");
520             surface->flags |= SFLAG_DYNLOCK;
521         }
522     }
523
524     /* Create a PBO for dynamically locked surfaces but don't do it for
525      * converted or NPOT surfaces. Also don't create a PBO for systemmem
526      * surfaces. */
527     if (gl_info->supported[ARB_PIXEL_BUFFER_OBJECT] && (surface->flags & SFLAG_DYNLOCK)
528             && !(surface->flags & (SFLAG_PBO | SFLAG_CONVERTED | SFLAG_NONPOW2))
529             && (surface->resource.pool != WINED3DPOOL_SYSTEMMEM))
530     {
531         struct wined3d_context *context;
532         GLenum error;
533
534         context = context_acquire(device, NULL);
535         ENTER_GL();
536
537         GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
538         error = glGetError();
539         if (!surface->pbo || error != GL_NO_ERROR)
540             ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
541
542         TRACE("Binding PBO %u.\n", surface->pbo);
543
544         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
545         checkGLcall("glBindBufferARB");
546
547         GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
548                 surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
549         checkGLcall("glBufferDataARB");
550
551         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
552         checkGLcall("glBindBufferARB");
553
554         /* We don't need the system memory anymore and we can't even use it for PBOs. */
555         if (!(surface->flags & SFLAG_CLIENT))
556         {
557             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
558             surface->resource.heapMemory = NULL;
559         }
560         surface->resource.allocatedMemory = NULL;
561         surface->flags |= SFLAG_PBO;
562         LEAVE_GL();
563         context_release(context);
564     }
565     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
566     {
567         /* Whatever surface we have, make sure that there is memory allocated
568          * for the downloaded copy, or a PBO to map. */
569         if (!surface->resource.heapMemory)
570             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
571
572         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
573                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
574
575         if (surface->flags & SFLAG_INSYSMEM)
576             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
577     }
578 }
579
580 static void surface_evict_sysmem(struct wined3d_surface *surface)
581 {
582     if (surface->flags & SFLAG_DONOTFREE)
583         return;
584
585     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
586     surface->resource.allocatedMemory = NULL;
587     surface->resource.heapMemory = NULL;
588     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
589 }
590
591 /* Context activation is done by the caller. */
592 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
593         const struct wined3d_gl_info *gl_info, BOOL srgb)
594 {
595     struct wined3d_device *device = surface->resource.device;
596     DWORD active_sampler;
597     GLint active_texture;
598
599     /* We don't need a specific texture unit, but after binding the texture
600      * the current unit is dirty. Read the unit back instead of switching to
601      * 0, this avoids messing around with the state manager's GL states. The
602      * current texture unit should always be a valid one.
603      *
604      * To be more specific, this is tricky because we can implicitly be
605      * called from sampler() in state.c. This means we can't touch anything
606      * other than whatever happens to be the currently active texture, or we
607      * would risk marking already applied sampler states dirty again.
608      *
609      * TODO: Track the current active texture per GL context instead of using
610      * glGet(). */
611
612     ENTER_GL();
613     glGetIntegerv(GL_ACTIVE_TEXTURE, &active_texture);
614     LEAVE_GL();
615     active_sampler = device->rev_tex_unit_map[active_texture - GL_TEXTURE0_ARB];
616
617     if (active_sampler != WINED3D_UNMAPPED_STAGE)
618         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
619     surface_bind(surface, gl_info, srgb);
620 }
621
622 static void surface_force_reload(struct wined3d_surface *surface)
623 {
624     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
625 }
626
627 static void surface_release_client_storage(struct wined3d_surface *surface)
628 {
629     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
630
631     ENTER_GL();
632     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
633     if (surface->texture_name)
634     {
635         surface_bind_and_dirtify(surface, context->gl_info, FALSE);
636         glTexImage2D(surface->texture_target, surface->texture_level,
637                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
638     }
639     if (surface->texture_name_srgb)
640     {
641         surface_bind_and_dirtify(surface, context->gl_info, TRUE);
642         glTexImage2D(surface->texture_target, surface->texture_level,
643                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
644     }
645     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
646     LEAVE_GL();
647
648     context_release(context);
649
650     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
651     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
652     surface_force_reload(surface);
653 }
654
655 static HRESULT surface_private_setup(struct wined3d_surface *surface)
656 {
657     /* TODO: Check against the maximum texture sizes supported by the video card. */
658     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
659     unsigned int pow2Width, pow2Height;
660
661     TRACE("surface %p.\n", surface);
662
663     surface->texture_name = 0;
664     surface->texture_target = GL_TEXTURE_2D;
665
666     /* Non-power2 support */
667     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
668     {
669         pow2Width = surface->resource.width;
670         pow2Height = surface->resource.height;
671     }
672     else
673     {
674         /* Find the nearest pow2 match */
675         pow2Width = pow2Height = 1;
676         while (pow2Width < surface->resource.width)
677             pow2Width <<= 1;
678         while (pow2Height < surface->resource.height)
679             pow2Height <<= 1;
680     }
681     surface->pow2Width = pow2Width;
682     surface->pow2Height = pow2Height;
683
684     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
685     {
686         /* TODO: Add support for non power two compressed textures. */
687         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
688         {
689             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
690                   surface, surface->resource.width, surface->resource.height);
691             return WINED3DERR_NOTAVAILABLE;
692         }
693     }
694
695     if (pow2Width != surface->resource.width
696             || pow2Height != surface->resource.height)
697     {
698         surface->flags |= SFLAG_NONPOW2;
699     }
700
701     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
702             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
703     {
704         /* One of three options:
705          * 1: Do the same as we do with NPOT and scale the texture, (any
706          *    texture ops would require the texture to be scaled which is
707          *    potentially slow)
708          * 2: Set the texture to the maximum size (bad idea).
709          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
710          * 4: Create the surface, but allow it to be used only for DirectDraw
711          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
712          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
713          *    the render target. */
714         if (surface->resource.pool == WINED3DPOOL_DEFAULT || surface->resource.pool == WINED3DPOOL_MANAGED)
715         {
716             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
717             return WINED3DERR_NOTAVAILABLE;
718         }
719
720         /* We should never use this surface in combination with OpenGL! */
721         TRACE("Creating an oversized surface: %ux%u.\n",
722                 surface->pow2Width, surface->pow2Height);
723     }
724     else
725     {
726         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
727          * and EXT_PALETTED_TEXTURE is used in combination with texture
728          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
729          * EXT_PALETTED_TEXTURE doesn't work in combination with
730          * ARB_TEXTURE_RECTANGLE. */
731         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
732                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
733                 && gl_info->supported[EXT_PALETTED_TEXTURE]
734                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
735         {
736             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
737             surface->pow2Width = surface->resource.width;
738             surface->pow2Height = surface->resource.height;
739             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
740         }
741     }
742
743     switch (wined3d_settings.offscreen_rendering_mode)
744     {
745         case ORM_FBO:
746             surface->get_drawable_size = get_drawable_size_fbo;
747             break;
748
749         case ORM_BACKBUFFER:
750             surface->get_drawable_size = get_drawable_size_backbuffer;
751             break;
752
753         default:
754             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
755             return WINED3DERR_INVALIDCALL;
756     }
757
758     surface->flags |= SFLAG_INSYSMEM;
759
760     return WINED3D_OK;
761 }
762
763 static void surface_realize_palette(struct wined3d_surface *surface)
764 {
765     struct wined3d_palette *palette = surface->palette;
766
767     TRACE("surface %p.\n", surface);
768
769     if (!palette) return;
770
771     if (surface->resource.format->id == WINED3DFMT_P8_UINT
772             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
773     {
774         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
775         {
776             /* Make sure the texture is up to date. This call doesn't do
777              * anything if the texture is already up to date. */
778             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
779
780             /* We want to force a palette refresh, so mark the drawable as not being up to date */
781             if (!surface_is_offscreen(surface))
782                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
783         }
784         else
785         {
786             if (!(surface->flags & SFLAG_INSYSMEM))
787             {
788                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
789                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
790             }
791             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
792         }
793     }
794
795     if (surface->flags & SFLAG_DIBSECTION)
796     {
797         RGBQUAD col[256];
798         unsigned int i;
799
800         TRACE("Updating the DC's palette.\n");
801
802         for (i = 0; i < 256; ++i)
803         {
804             col[i].rgbRed   = palette->palents[i].peRed;
805             col[i].rgbGreen = palette->palents[i].peGreen;
806             col[i].rgbBlue  = palette->palents[i].peBlue;
807             col[i].rgbReserved = 0;
808         }
809         SetDIBColorTable(surface->hDC, 0, 256, col);
810     }
811
812     /* Propagate the changes to the drawable when we have a palette. */
813     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
814         surface_load_location(surface, SFLAG_INDRAWABLE, NULL);
815 }
816
817 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
818 {
819     HRESULT hr;
820
821     /* If there's no destination surface there is nothing to do. */
822     if (!surface->overlay_dest)
823         return WINED3D_OK;
824
825     /* Blt calls ModifyLocation on the dest surface, which in turn calls
826      * DrawOverlay to update the overlay. Prevent an endless recursion. */
827     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
828         return WINED3D_OK;
829
830     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
831     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
832             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3DTEXF_LINEAR);
833     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
834
835     return hr;
836 }
837
838 static void surface_preload(struct wined3d_surface *surface)
839 {
840     TRACE("surface %p.\n", surface);
841
842     surface_internal_preload(surface, SRGB_ANY);
843 }
844
845 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
846 {
847     struct wined3d_device *device = surface->resource.device;
848     const RECT *pass_rect = rect;
849
850     TRACE("surface %p, rect %s, flags %#x.\n",
851             surface, wine_dbgstr_rect(rect), flags);
852
853     if (flags & WINED3DLOCK_DISCARD)
854     {
855         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
856         surface_prepare_system_memory(surface);
857         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
858     }
859     else
860     {
861         /* surface_load_location() does not check if the rectangle specifies
862          * the full surface. Most callers don't need that, so do it here. */
863         if (rect && !rect->top && !rect->left
864                 && rect->right == surface->resource.width
865                 && rect->bottom == surface->resource.height)
866             pass_rect = NULL;
867
868         if (!(wined3d_settings.rendertargetlock_mode == RTL_DISABLE
869                 && ((surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
870                 || surface == device->fb.render_targets[0])))
871             surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
872     }
873
874     if (surface->flags & SFLAG_PBO)
875     {
876         const struct wined3d_gl_info *gl_info;
877         struct wined3d_context *context;
878
879         context = context_acquire(device, NULL);
880         gl_info = context->gl_info;
881
882         ENTER_GL();
883         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
884         checkGLcall("glBindBufferARB");
885
886         /* This shouldn't happen but could occur if some other function
887          * didn't handle the PBO properly. */
888         if (surface->resource.allocatedMemory)
889             ERR("The surface already has PBO memory allocated.\n");
890
891         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
892         checkGLcall("glMapBufferARB");
893
894         /* Make sure the PBO isn't set anymore in order not to break non-PBO
895          * calls. */
896         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
897         checkGLcall("glBindBufferARB");
898
899         LEAVE_GL();
900         context_release(context);
901     }
902
903     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
904     {
905         if (!rect)
906             surface_add_dirty_rect(surface, NULL);
907         else
908         {
909             WINED3DBOX b;
910
911             b.Left = rect->left;
912             b.Top = rect->top;
913             b.Right = rect->right;
914             b.Bottom = rect->bottom;
915             b.Front = 0;
916             b.Back = 1;
917             surface_add_dirty_rect(surface, &b);
918         }
919     }
920 }
921
922 static void surface_unmap(struct wined3d_surface *surface)
923 {
924     struct wined3d_device *device = surface->resource.device;
925     BOOL fullsurface;
926
927     TRACE("surface %p.\n", surface);
928
929     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
930
931     if (surface->flags & SFLAG_PBO)
932     {
933         const struct wined3d_gl_info *gl_info;
934         struct wined3d_context *context;
935
936         TRACE("Freeing PBO memory.\n");
937
938         context = context_acquire(device, NULL);
939         gl_info = context->gl_info;
940
941         ENTER_GL();
942         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
943         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
944         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
945         checkGLcall("glUnmapBufferARB");
946         LEAVE_GL();
947         context_release(context);
948
949         surface->resource.allocatedMemory = NULL;
950     }
951
952     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
953
954     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
955     {
956         TRACE("Not dirtified, nothing to do.\n");
957         goto done;
958     }
959
960     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
961             || (device->fb.render_targets && surface == device->fb.render_targets[0]))
962     {
963         if (wined3d_settings.rendertargetlock_mode == RTL_DISABLE)
964         {
965             static BOOL warned = FALSE;
966             if (!warned)
967             {
968                 ERR("The application tries to write to the render target, but render target locking is disabled.\n");
969                 warned = TRUE;
970             }
971             goto done;
972         }
973
974         if (!surface->dirtyRect.left && !surface->dirtyRect.top
975                 && surface->dirtyRect.right == surface->resource.width
976                 && surface->dirtyRect.bottom == surface->resource.height)
977         {
978             fullsurface = TRUE;
979         }
980         else
981         {
982             /* TODO: Proper partial rectangle tracking. */
983             fullsurface = FALSE;
984             surface->flags |= SFLAG_INSYSMEM;
985         }
986
987         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
988
989         /* Partial rectangle tracking is not commonly implemented, it is only
990          * done for render targets. INSYSMEM was set before to tell
991          * surface_load_location() where to read the rectangle from.
992          * Indrawable is set because all modifications from the partial
993          * sysmem copy are written back to the drawable, thus the surface is
994          * merged again in the drawable. The sysmem copy is not fully up to
995          * date because only a subrectangle was read in Map(). */
996         if (!fullsurface)
997         {
998             surface_modify_location(surface, SFLAG_INDRAWABLE, TRUE);
999             surface_evict_sysmem(surface);
1000         }
1001
1002         surface->dirtyRect.left = surface->resource.width;
1003         surface->dirtyRect.top = surface->resource.height;
1004         surface->dirtyRect.right = 0;
1005         surface->dirtyRect.bottom = 0;
1006     }
1007     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
1008     {
1009         FIXME("Depth / stencil buffer locking is not implemented.\n");
1010     }
1011
1012 done:
1013     /* Overlays have to be redrawn manually after changes with the GL implementation */
1014     if (surface->overlay_dest)
1015         surface->surface_ops->surface_draw_overlay(surface);
1016 }
1017
1018 static HRESULT surface_getdc(struct wined3d_surface *surface)
1019 {
1020     WINED3DLOCKED_RECT lock;
1021     HRESULT hr;
1022
1023     TRACE("surface %p.\n", surface);
1024
1025     /* Create a DIB section if there isn't a dc yet. */
1026     if (!surface->hDC)
1027     {
1028         if (surface->flags & SFLAG_CLIENT)
1029         {
1030             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1031             surface_release_client_storage(surface);
1032         }
1033         hr = surface_create_dib_section(surface);
1034         if (FAILED(hr))
1035             return WINED3DERR_INVALIDCALL;
1036
1037         /* Use the DIB section from now on if we are not using a PBO. */
1038         if (!(surface->flags & SFLAG_PBO))
1039             surface->resource.allocatedMemory = surface->dib.bitmap_data;
1040     }
1041
1042     /* Map the surface. */
1043     hr = wined3d_surface_map(surface, &lock, NULL, 0);
1044     if (FAILED(hr))
1045         ERR("Map failed, hr %#x.\n", hr);
1046
1047     /* Sync the DIB with the PBO. This can't be done earlier because Map()
1048      * activates the allocatedMemory. */
1049     if (surface->flags & SFLAG_PBO)
1050         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->dib.bitmap_size);
1051
1052     return hr;
1053 }
1054
1055 static HRESULT surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override)
1056 {
1057     TRACE("surface %p, override %p.\n", surface, override);
1058
1059     /* Flipping is only supported on render targets and overlays. */
1060     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
1061     {
1062         WARN("Tried to flip a non-render target, non-overlay surface.\n");
1063         return WINEDDERR_NOTFLIPPABLE;
1064     }
1065
1066     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1067     {
1068         flip_surface(surface, override);
1069
1070         /* Update the overlay if it is visible */
1071         if (surface->overlay_dest)
1072             return surface->surface_ops->surface_draw_overlay(surface);
1073         else
1074             return WINED3D_OK;
1075     }
1076
1077     return WINED3D_OK;
1078 }
1079
1080 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1081 {
1082     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1083         return FALSE;
1084     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1085         return FALSE;
1086     return TRUE;
1087 }
1088
1089 static void wined3d_surface_depth_blt_fbo(struct wined3d_device *device, struct wined3d_surface *src_surface,
1090         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1091 {
1092     const struct wined3d_gl_info *gl_info;
1093     struct wined3d_context *context;
1094     DWORD src_mask, dst_mask;
1095     GLbitfield gl_mask;
1096
1097     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1098             device, src_surface, wine_dbgstr_rect(src_rect),
1099             dst_surface, wine_dbgstr_rect(dst_rect));
1100
1101     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1102     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1103
1104     if (src_mask != dst_mask)
1105     {
1106         ERR("Incompatible formats %s and %s.\n",
1107                 debug_d3dformat(src_surface->resource.format->id),
1108                 debug_d3dformat(dst_surface->resource.format->id));
1109         return;
1110     }
1111
1112     if (!src_mask)
1113     {
1114         ERR("Not a depth / stencil format: %s.\n",
1115                 debug_d3dformat(src_surface->resource.format->id));
1116         return;
1117     }
1118
1119     gl_mask = 0;
1120     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1121         gl_mask |= GL_DEPTH_BUFFER_BIT;
1122     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1123         gl_mask |= GL_STENCIL_BUFFER_BIT;
1124
1125     /* Make sure the locations are up-to-date. Loading the destination
1126      * surface isn't required if the entire surface is overwritten. */
1127     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1128     if (!surface_is_full_rect(dst_surface, dst_rect))
1129         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1130
1131     context = context_acquire(device, NULL);
1132     if (!context->valid)
1133     {
1134         context_release(context);
1135         WARN("Invalid context, skipping blit.\n");
1136         return;
1137     }
1138
1139     gl_info = context->gl_info;
1140
1141     ENTER_GL();
1142
1143     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1144     glReadBuffer(GL_NONE);
1145     checkGLcall("glReadBuffer()");
1146     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1147
1148     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1149     context_set_draw_buffer(context, GL_NONE);
1150     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1151
1152     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1153     {
1154         glDepthMask(GL_TRUE);
1155         context_invalidate_state(context, STATE_RENDER(WINED3DRS_ZWRITEENABLE));
1156     }
1157     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1158     {
1159         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1160         {
1161             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1162             context_invalidate_state(context, STATE_RENDER(WINED3DRS_TWOSIDEDSTENCILMODE));
1163         }
1164         glStencilMask(~0U);
1165         context_invalidate_state(context, STATE_RENDER(WINED3DRS_STENCILWRITEMASK));
1166     }
1167
1168     glDisable(GL_SCISSOR_TEST);
1169     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1170
1171     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1172             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1173     checkGLcall("glBlitFramebuffer()");
1174
1175     LEAVE_GL();
1176
1177     if (wined3d_settings.strict_draw_ordering)
1178         wglFlush(); /* Flush to ensure ordering across contexts. */
1179
1180     context_release(context);
1181 }
1182
1183 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1184  * Depth / stencil is not supported. */
1185 static void surface_blt_fbo(struct wined3d_device *device, const WINED3DTEXTUREFILTERTYPE filter,
1186         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1187         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1188 {
1189     const struct wined3d_gl_info *gl_info;
1190     struct wined3d_context *context;
1191     RECT src_rect, dst_rect;
1192     GLenum gl_filter;
1193     GLenum buffer;
1194
1195     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1196     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1197             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1198     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1199             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1200
1201     src_rect = *src_rect_in;
1202     dst_rect = *dst_rect_in;
1203
1204     switch (filter)
1205     {
1206         case WINED3DTEXF_LINEAR:
1207             gl_filter = GL_LINEAR;
1208             break;
1209
1210         default:
1211             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1212         case WINED3DTEXF_NONE:
1213         case WINED3DTEXF_POINT:
1214             gl_filter = GL_NEAREST;
1215             break;
1216     }
1217
1218     if (src_location == SFLAG_INDRAWABLE && surface_is_offscreen(src_surface))
1219         src_location = SFLAG_INTEXTURE;
1220     if (dst_location == SFLAG_INDRAWABLE && surface_is_offscreen(dst_surface))
1221         dst_location = SFLAG_INTEXTURE;
1222
1223     /* Make sure the locations are up-to-date. Loading the destination
1224      * surface isn't required if the entire surface is overwritten. (And is
1225      * in fact harmful if we're being called by surface_load_location() with
1226      * the purpose of loading the destination surface.) */
1227     surface_load_location(src_surface, src_location, NULL);
1228     if (!surface_is_full_rect(dst_surface, &dst_rect))
1229         surface_load_location(dst_surface, dst_location, NULL);
1230
1231     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1232     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1233     else context = context_acquire(device, NULL);
1234
1235     if (!context->valid)
1236     {
1237         context_release(context);
1238         WARN("Invalid context, skipping blit.\n");
1239         return;
1240     }
1241
1242     gl_info = context->gl_info;
1243
1244     if (src_location == SFLAG_INDRAWABLE)
1245     {
1246         TRACE("Source surface %p is onscreen.\n", src_surface);
1247         buffer = surface_get_gl_buffer(src_surface);
1248         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1249     }
1250     else
1251     {
1252         TRACE("Source surface %p is offscreen.\n", src_surface);
1253         buffer = GL_COLOR_ATTACHMENT0;
1254     }
1255
1256     ENTER_GL();
1257     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1258     glReadBuffer(buffer);
1259     checkGLcall("glReadBuffer()");
1260     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1261     LEAVE_GL();
1262
1263     if (dst_location == SFLAG_INDRAWABLE)
1264     {
1265         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1266         buffer = surface_get_gl_buffer(dst_surface);
1267         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1268     }
1269     else
1270     {
1271         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1272         buffer = GL_COLOR_ATTACHMENT0;
1273     }
1274
1275     ENTER_GL();
1276     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1277     context_set_draw_buffer(context, buffer);
1278     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1279     context_invalidate_state(context, STATE_FRAMEBUFFER);
1280
1281     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1282     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE));
1283     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE1));
1284     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE2));
1285     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE3));
1286
1287     glDisable(GL_SCISSOR_TEST);
1288     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1289
1290     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1291             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1292     checkGLcall("glBlitFramebuffer()");
1293
1294     LEAVE_GL();
1295
1296     if (wined3d_settings.strict_draw_ordering
1297             || (dst_location == SFLAG_INDRAWABLE
1298             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1299         wglFlush();
1300
1301     context_release(context);
1302 }
1303
1304 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1305         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
1306         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
1307 {
1308     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1309         return FALSE;
1310
1311     /* Source and/or destination need to be on the GL side */
1312     if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
1313         return FALSE;
1314
1315     switch (blit_op)
1316     {
1317         case WINED3D_BLIT_OP_COLOR_BLIT:
1318             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1319                 return FALSE;
1320             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1321                 return FALSE;
1322             break;
1323
1324         case WINED3D_BLIT_OP_DEPTH_BLIT:
1325             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1326                 return FALSE;
1327             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1328                 return FALSE;
1329             break;
1330
1331         default:
1332             return FALSE;
1333     }
1334
1335     if (!(src_format->id == dst_format->id
1336             || (is_identity_fixup(src_format->color_fixup)
1337             && is_identity_fixup(dst_format->color_fixup))))
1338         return FALSE;
1339
1340     return TRUE;
1341 }
1342
1343 /* This function checks if the primary render target uses the 8bit paletted format. */
1344 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1345 {
1346     if (device->fb.render_targets && device->fb.render_targets[0])
1347     {
1348         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1349         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1350                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1351             return TRUE;
1352     }
1353     return FALSE;
1354 }
1355
1356 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1357         DWORD color, WINED3DCOLORVALUE *float_color)
1358 {
1359     const struct wined3d_format *format = surface->resource.format;
1360     const struct wined3d_device *device = surface->resource.device;
1361
1362     switch (format->id)
1363     {
1364         case WINED3DFMT_P8_UINT:
1365             if (surface->palette)
1366             {
1367                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1368                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1369                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1370             }
1371             else
1372             {
1373                 float_color->r = 0.0f;
1374                 float_color->g = 0.0f;
1375                 float_color->b = 0.0f;
1376             }
1377             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1378             break;
1379
1380         case WINED3DFMT_B5G6R5_UNORM:
1381             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1382             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1383             float_color->b = (color & 0x1f) / 31.0f;
1384             float_color->a = 1.0f;
1385             break;
1386
1387         case WINED3DFMT_B8G8R8_UNORM:
1388         case WINED3DFMT_B8G8R8X8_UNORM:
1389             float_color->r = D3DCOLOR_R(color);
1390             float_color->g = D3DCOLOR_G(color);
1391             float_color->b = D3DCOLOR_B(color);
1392             float_color->a = 1.0f;
1393             break;
1394
1395         case WINED3DFMT_B8G8R8A8_UNORM:
1396             float_color->r = D3DCOLOR_R(color);
1397             float_color->g = D3DCOLOR_G(color);
1398             float_color->b = D3DCOLOR_B(color);
1399             float_color->a = D3DCOLOR_A(color);
1400             break;
1401
1402         default:
1403             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1404             return FALSE;
1405     }
1406
1407     return TRUE;
1408 }
1409
1410 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1411 {
1412     const struct wined3d_format *format = surface->resource.format;
1413
1414     switch (format->id)
1415     {
1416         case WINED3DFMT_S1_UINT_D15_UNORM:
1417             *float_depth = depth / (float)0x00007fff;
1418             break;
1419
1420         case WINED3DFMT_D16_UNORM:
1421             *float_depth = depth / (float)0x0000ffff;
1422             break;
1423
1424         case WINED3DFMT_D24_UNORM_S8_UINT:
1425         case WINED3DFMT_X8D24_UNORM:
1426             *float_depth = depth / (float)0x00ffffff;
1427             break;
1428
1429         case WINED3DFMT_D32_UNORM:
1430             *float_depth = depth / (float)0xffffffff;
1431             break;
1432
1433         default:
1434             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1435             return FALSE;
1436     }
1437
1438     return TRUE;
1439 }
1440
1441 /* Do not call while under the GL lock. */
1442 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1443 {
1444     const struct wined3d_resource *resource = &surface->resource;
1445     struct wined3d_device *device = resource->device;
1446     const struct blit_shader *blitter;
1447
1448     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1449             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1450     if (!blitter)
1451     {
1452         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1453         return WINED3DERR_INVALIDCALL;
1454     }
1455
1456     return blitter->depth_fill(device, surface, rect, depth);
1457 }
1458
1459 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1460         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1461 {
1462     struct wined3d_device *device = src_surface->resource.device;
1463
1464     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1465             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1466             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1467         return WINED3DERR_INVALIDCALL;
1468
1469     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1470
1471     surface_modify_ds_location(dst_surface, SFLAG_DS_OFFSCREEN,
1472             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1473     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
1474
1475     return WINED3D_OK;
1476 }
1477
1478 /* Do not call while under the GL lock. */
1479 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1480         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1481         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1482 {
1483     const struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1484     struct wined3d_device *device = dst_surface->resource.device;
1485     DWORD src_ds_flags, dst_ds_flags;
1486     RECT src_rect, dst_rect;
1487
1488     static const DWORD simple_blit = WINEDDBLT_ASYNC
1489             | WINEDDBLT_COLORFILL
1490             | WINEDDBLT_WAIT
1491             | WINEDDBLT_DEPTHFILL
1492             | WINEDDBLT_DONOTWAIT;
1493
1494     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1495             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1496             flags, fx, debug_d3dtexturefiltertype(filter));
1497     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1498
1499     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1500     {
1501         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1502         return WINEDDERR_SURFACEBUSY;
1503     }
1504
1505     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1506
1507     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1508             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1509             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1510             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1511             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1512     {
1513         /* The destination rect can be out of bounds on the condition
1514          * that a clipper is set for the surface. */
1515         if (dst_surface->clipper)
1516             FIXME("Blit clipping not implemented.\n");
1517         else
1518             WARN("The application gave us a bad destination rectangle without a clipper set.\n");
1519         return WINEDDERR_INVALIDRECT;
1520     }
1521
1522     if (src_surface)
1523     {
1524         surface_get_rect(src_surface, src_rect_in, &src_rect);
1525
1526         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1527                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1528                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1529                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1530                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1531         {
1532             WARN("Application gave us bad source rectangle for Blt.\n");
1533             return WINEDDERR_INVALIDRECT;
1534         }
1535     }
1536     else
1537     {
1538         memset(&src_rect, 0, sizeof(src_rect));
1539     }
1540
1541     if (!fx || !(fx->dwDDFX))
1542         flags &= ~WINEDDBLT_DDFX;
1543
1544     if (flags & WINEDDBLT_WAIT)
1545         flags &= ~WINEDDBLT_WAIT;
1546
1547     if (flags & WINEDDBLT_ASYNC)
1548     {
1549         static unsigned int once;
1550
1551         if (!once++)
1552             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1553         flags &= ~WINEDDBLT_ASYNC;
1554     }
1555
1556     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1557     if (flags & WINEDDBLT_DONOTWAIT)
1558     {
1559         static unsigned int once;
1560
1561         if (!once++)
1562             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1563         flags &= ~WINEDDBLT_DONOTWAIT;
1564     }
1565
1566     if (!device->d3d_initialized)
1567     {
1568         WARN("D3D not initialized, using fallback.\n");
1569         goto cpu;
1570     }
1571
1572     if (flags & ~simple_blit)
1573     {
1574         WARN("Using fallback for complex blit (%#x).\n", flags);
1575         goto fallback;
1576     }
1577
1578     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1579         src_swapchain = src_surface->container.u.swapchain;
1580     else
1581         src_swapchain = NULL;
1582
1583     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1584         dst_swapchain = dst_surface->container.u.swapchain;
1585     else
1586         dst_swapchain = NULL;
1587
1588     /* This isn't strictly needed. FBO blits for example could deal with
1589      * cross-swapchain blits by first downloading the source to a texture
1590      * before switching to the destination context. We just have this here to
1591      * not have to deal with the issue, since cross-swapchain blits should be
1592      * rare. */
1593     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1594     {
1595         FIXME("Using fallback for cross-swapchain blit.\n");
1596         goto fallback;
1597     }
1598
1599     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1600     if (src_surface)
1601         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1602     else
1603         src_ds_flags = 0;
1604
1605     if (src_ds_flags || dst_ds_flags)
1606     {
1607         if (flags & WINEDDBLT_DEPTHFILL)
1608         {
1609             float depth;
1610
1611             TRACE("Depth fill.\n");
1612
1613             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1614                 return WINED3DERR_INVALIDCALL;
1615
1616             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1617                 return WINED3D_OK;
1618         }
1619         else
1620         {
1621             /* Accessing depth / stencil surfaces is supposed to fail while in
1622              * a scene, except for fills, which seem to work. */
1623             if (device->inScene)
1624             {
1625                 WARN("Rejecting depth / stencil access while in scene.\n");
1626                 return WINED3DERR_INVALIDCALL;
1627             }
1628
1629             if (src_ds_flags != dst_ds_flags)
1630             {
1631                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1632                 return WINED3DERR_INVALIDCALL;
1633             }
1634
1635             if (src_rect.top || src_rect.left
1636                     || src_rect.bottom != src_surface->resource.height
1637                     || src_rect.right != src_surface->resource.width)
1638             {
1639                 WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
1640                         wine_dbgstr_rect(&src_rect));
1641                 return WINED3DERR_INVALIDCALL;
1642             }
1643
1644             if (dst_rect.top || dst_rect.left
1645                     || dst_rect.bottom != dst_surface->resource.height
1646                     || dst_rect.right != dst_surface->resource.width)
1647             {
1648                 WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
1649                         wine_dbgstr_rect(&src_rect));
1650                 return WINED3DERR_INVALIDCALL;
1651             }
1652
1653             if (src_surface->resource.height != dst_surface->resource.height
1654                     || src_surface->resource.width != dst_surface->resource.width)
1655             {
1656                 WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
1657                 return WINED3DERR_INVALIDCALL;
1658             }
1659
1660             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1661                 return WINED3D_OK;
1662         }
1663     }
1664     else
1665     {
1666         if (flags & WINEDDBLT_COLORFILL)
1667         {
1668             WINED3DCOLORVALUE color;
1669
1670             TRACE("Color fill.\n");
1671
1672             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1673                 goto fallback;
1674
1675             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1676                 return WINED3D_OK;
1677         }
1678         else
1679         {
1680             TRACE("Color blit.\n");
1681
1682             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1683                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1684                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1685             {
1686                 TRACE("Using FBO blit.\n");
1687
1688                 surface_blt_fbo(device, filter,
1689                         src_surface, src_surface->draw_binding, &src_rect,
1690                         dst_surface, dst_surface->draw_binding, &dst_rect);
1691                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1692                 return WINED3D_OK;
1693             }
1694
1695             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1696                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1697                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1698             {
1699                 TRACE("Using arbfp blit.\n");
1700
1701                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1702                     return WINED3D_OK;
1703             }
1704         }
1705     }
1706
1707 fallback:
1708
1709     /* Special cases for render targets. */
1710     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1711             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1712     {
1713         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1714                 src_surface, &src_rect, flags, fx, filter)))
1715             return WINED3D_OK;
1716     }
1717
1718 cpu:
1719
1720     /* For the rest call the X11 surface implementation. For render targets
1721      * this should be implemented OpenGL accelerated in BltOverride, other
1722      * blits are rather rare. */
1723     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1724 }
1725
1726 /* Do not call while under the GL lock. */
1727 HRESULT CDECL wined3d_surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
1728         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD trans)
1729 {
1730     RECT src_rect, dst_rect;
1731     DWORD flags = 0;
1732
1733     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect_in %s, trans %#x.\n",
1734             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect_in), trans);
1735
1736     surface_get_rect(src_surface, src_rect_in, &src_rect);
1737
1738     dst_rect.left = dst_x;
1739     dst_rect.top = dst_y;
1740     dst_rect.right = dst_x + src_rect.right - src_rect.left;
1741     dst_rect.bottom = dst_y + src_rect.bottom - src_rect.top;
1742
1743     if (trans & WINEDDBLTFAST_SRCCOLORKEY)
1744         flags |= WINEDDBLT_KEYSRC;
1745     if (trans & WINEDDBLTFAST_DESTCOLORKEY)
1746         flags |= WINEDDBLT_KEYDEST;
1747     if (trans & WINEDDBLTFAST_WAIT)
1748         flags |= WINEDDBLT_WAIT;
1749     if (trans & WINEDDBLTFAST_DONOTWAIT)
1750         flags |= WINEDDBLT_DONOTWAIT;
1751
1752     return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, NULL, WINED3DTEXF_POINT);
1753 }
1754
1755 static HRESULT surface_set_mem(struct wined3d_surface *surface, void *mem)
1756 {
1757     TRACE("surface %p, mem %p.\n", surface, mem);
1758
1759     if (mem && mem != surface->resource.allocatedMemory)
1760     {
1761         void *release = NULL;
1762
1763         /* Do I have to copy the old surface content? */
1764         if (surface->flags & SFLAG_DIBSECTION)
1765         {
1766             SelectObject(surface->hDC, surface->dib.holdbitmap);
1767             DeleteDC(surface->hDC);
1768             /* Release the DIB section. */
1769             DeleteObject(surface->dib.DIBsection);
1770             surface->dib.bitmap_data = NULL;
1771             surface->resource.allocatedMemory = NULL;
1772             surface->hDC = NULL;
1773             surface->flags &= ~SFLAG_DIBSECTION;
1774         }
1775         else if (!(surface->flags & SFLAG_USERPTR))
1776         {
1777             release = surface->resource.heapMemory;
1778             surface->resource.heapMemory = NULL;
1779         }
1780         surface->resource.allocatedMemory = mem;
1781         surface->flags |= SFLAG_USERPTR;
1782
1783         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
1784         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1785
1786         /* For client textures OpenGL has to be notified. */
1787         if (surface->flags & SFLAG_CLIENT)
1788             surface_release_client_storage(surface);
1789
1790         /* Now free the old memory if any. */
1791         HeapFree(GetProcessHeap(), 0, release);
1792     }
1793     else if (surface->flags & SFLAG_USERPTR)
1794     {
1795         /* HeapMemory should be NULL already. */
1796         if (surface->resource.heapMemory)
1797             ERR("User pointer surface has heap memory allocated.\n");
1798
1799         if (!mem)
1800         {
1801             surface->resource.allocatedMemory = NULL;
1802             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
1803
1804             if (surface->flags & SFLAG_CLIENT)
1805                 surface_release_client_storage(surface);
1806
1807             surface_prepare_system_memory(surface);
1808         }
1809
1810         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1811     }
1812
1813     return WINED3D_OK;
1814 }
1815
1816 /* Context activation is done by the caller. */
1817 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1818 {
1819     if (!surface->resource.heapMemory)
1820     {
1821         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1822         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1823                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1824     }
1825
1826     ENTER_GL();
1827     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1828     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1829     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1830             surface->resource.size, surface->resource.allocatedMemory));
1831     checkGLcall("glGetBufferSubDataARB");
1832     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1833     checkGLcall("glDeleteBuffersARB");
1834     LEAVE_GL();
1835
1836     surface->pbo = 0;
1837     surface->flags &= ~SFLAG_PBO;
1838 }
1839
1840 /* Do not call while under the GL lock. */
1841 static void surface_unload(struct wined3d_resource *resource)
1842 {
1843     struct wined3d_surface *surface = surface_from_resource(resource);
1844     struct wined3d_renderbuffer_entry *entry, *entry2;
1845     struct wined3d_device *device = resource->device;
1846     const struct wined3d_gl_info *gl_info;
1847     struct wined3d_context *context;
1848
1849     TRACE("surface %p.\n", surface);
1850
1851     if (resource->pool == WINED3DPOOL_DEFAULT)
1852     {
1853         /* Default pool resources are supposed to be destroyed before Reset is called.
1854          * Implicit resources stay however. So this means we have an implicit render target
1855          * or depth stencil. The content may be destroyed, but we still have to tear down
1856          * opengl resources, so we cannot leave early.
1857          *
1858          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1859          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1860          * or the depth stencil into an FBO the texture or render buffer will be removed
1861          * and all flags get lost
1862          */
1863         surface_init_sysmem(surface);
1864     }
1865     else
1866     {
1867         /* Load the surface into system memory */
1868         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1869         surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
1870     }
1871     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1872     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1873     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1874
1875     context = context_acquire(device, NULL);
1876     gl_info = context->gl_info;
1877
1878     /* Destroy PBOs, but load them into real sysmem before */
1879     if (surface->flags & SFLAG_PBO)
1880         surface_remove_pbo(surface, gl_info);
1881
1882     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1883      * all application-created targets the application has to release the surface
1884      * before calling _Reset
1885      */
1886     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1887     {
1888         ENTER_GL();
1889         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1890         LEAVE_GL();
1891         list_remove(&entry->entry);
1892         HeapFree(GetProcessHeap(), 0, entry);
1893     }
1894     list_init(&surface->renderbuffers);
1895     surface->current_renderbuffer = NULL;
1896
1897     /* If we're in a texture, the texture name belongs to the texture.
1898      * Otherwise, destroy it. */
1899     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1900     {
1901         ENTER_GL();
1902         glDeleteTextures(1, &surface->texture_name);
1903         surface->texture_name = 0;
1904         glDeleteTextures(1, &surface->texture_name_srgb);
1905         surface->texture_name_srgb = 0;
1906         LEAVE_GL();
1907     }
1908
1909     context_release(context);
1910
1911     resource_unload(resource);
1912 }
1913
1914 static const struct wined3d_resource_ops surface_resource_ops =
1915 {
1916     surface_unload,
1917 };
1918
1919 static const struct wined3d_surface_ops surface_ops =
1920 {
1921     surface_private_setup,
1922     surface_cleanup,
1923     surface_realize_palette,
1924     surface_draw_overlay,
1925     surface_preload,
1926     surface_map,
1927     surface_unmap,
1928     surface_getdc,
1929     surface_flip,
1930     surface_set_mem,
1931 };
1932
1933 /*****************************************************************************
1934  * Initializes the GDI surface, aka creates the DIB section we render to
1935  * The DIB section creation is done by calling GetDC, which will create the
1936  * section and releasing the dc to allow the app to use it. The dib section
1937  * will stay until the surface is released
1938  *
1939  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1940  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1941  * avoid confusion in the shared surface code.
1942  *
1943  * Returns:
1944  *  WINED3D_OK on success
1945  *  The return values of called methods on failure
1946  *
1947  *****************************************************************************/
1948 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1949 {
1950     HRESULT hr;
1951
1952     TRACE("surface %p.\n", surface);
1953
1954     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1955     {
1956         ERR("Overlays not yet supported by GDI surfaces.\n");
1957         return WINED3DERR_INVALIDCALL;
1958     }
1959
1960     /* Sysmem textures have memory already allocated - release it,
1961      * this avoids an unnecessary memcpy. */
1962     hr = surface_create_dib_section(surface);
1963     if (SUCCEEDED(hr))
1964     {
1965         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1966         surface->resource.heapMemory = NULL;
1967         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1968     }
1969
1970     /* We don't mind the nonpow2 stuff in GDI. */
1971     surface->pow2Width = surface->resource.width;
1972     surface->pow2Height = surface->resource.height;
1973
1974     return WINED3D_OK;
1975 }
1976
1977 static void surface_gdi_cleanup(struct wined3d_surface *surface)
1978 {
1979     TRACE("surface %p.\n", surface);
1980
1981     if (surface->flags & SFLAG_DIBSECTION)
1982     {
1983         /* Release the DC. */
1984         SelectObject(surface->hDC, surface->dib.holdbitmap);
1985         DeleteDC(surface->hDC);
1986         /* Release the DIB section. */
1987         DeleteObject(surface->dib.DIBsection);
1988         surface->dib.bitmap_data = NULL;
1989         surface->resource.allocatedMemory = NULL;
1990     }
1991
1992     if (surface->flags & SFLAG_USERPTR)
1993         wined3d_surface_set_mem(surface, NULL);
1994     if (surface->overlay_dest)
1995         list_remove(&surface->overlay_entry);
1996
1997     HeapFree(GetProcessHeap(), 0, surface->palette9);
1998
1999     resource_cleanup(&surface->resource);
2000 }
2001
2002 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
2003 {
2004     struct wined3d_palette *palette = surface->palette;
2005
2006     TRACE("surface %p.\n", surface);
2007
2008     if (!palette) return;
2009
2010     if (surface->flags & SFLAG_DIBSECTION)
2011     {
2012         RGBQUAD col[256];
2013         unsigned int i;
2014
2015         TRACE("Updating the DC's palette.\n");
2016
2017         for (i = 0; i < 256; ++i)
2018         {
2019             col[i].rgbRed = palette->palents[i].peRed;
2020             col[i].rgbGreen = palette->palents[i].peGreen;
2021             col[i].rgbBlue = palette->palents[i].peBlue;
2022             col[i].rgbReserved = 0;
2023         }
2024         SetDIBColorTable(surface->hDC, 0, 256, col);
2025     }
2026
2027     /* Update the image because of the palette change. Some games like e.g.
2028      * Red Alert call SetEntries a lot to implement fading. */
2029     /* Tell the swapchain to update the screen. */
2030     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2031     {
2032         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2033         if (surface == swapchain->front_buffer)
2034         {
2035             x11_copy_to_screen(swapchain, NULL);
2036         }
2037     }
2038 }
2039
2040 static HRESULT gdi_surface_draw_overlay(struct wined3d_surface *surface)
2041 {
2042     FIXME("GDI surfaces can't draw overlays yet.\n");
2043     return E_FAIL;
2044 }
2045
2046 static void gdi_surface_preload(struct wined3d_surface *surface)
2047 {
2048     TRACE("surface %p.\n", surface);
2049
2050     ERR("Preloading GDI surfaces is not supported.\n");
2051 }
2052
2053 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
2054 {
2055     TRACE("surface %p, rect %s, flags %#x.\n",
2056             surface, wine_dbgstr_rect(rect), flags);
2057
2058     if (!surface->resource.allocatedMemory)
2059     {
2060         /* This happens on gdi surfaces if the application set a user pointer
2061          * and resets it. Recreate the DIB section. */
2062         surface_create_dib_section(surface);
2063         surface->resource.allocatedMemory = surface->dib.bitmap_data;
2064     }
2065 }
2066
2067 static void gdi_surface_unmap(struct wined3d_surface *surface)
2068 {
2069     TRACE("surface %p.\n", surface);
2070
2071     /* Tell the swapchain to update the screen. */
2072     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2073     {
2074         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2075         if (surface == swapchain->front_buffer)
2076         {
2077             x11_copy_to_screen(swapchain, &surface->lockedRect);
2078         }
2079     }
2080
2081     memset(&surface->lockedRect, 0, sizeof(RECT));
2082 }
2083
2084 static HRESULT gdi_surface_getdc(struct wined3d_surface *surface)
2085 {
2086     WINED3DLOCKED_RECT lock;
2087     HRESULT hr;
2088
2089     TRACE("surface %p.\n", surface);
2090
2091     /* Should have a DIB section already. */
2092     if (!(surface->flags & SFLAG_DIBSECTION))
2093     {
2094         WARN("DC not supported on this surface\n");
2095         return WINED3DERR_INVALIDCALL;
2096     }
2097
2098     /* Map the surface. */
2099     hr = wined3d_surface_map(surface, &lock, NULL, 0);
2100     if (FAILED(hr))
2101         ERR("Map failed, hr %#x.\n", hr);
2102
2103     return hr;
2104 }
2105
2106 static HRESULT gdi_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override)
2107 {
2108     TRACE("surface %p, override %p.\n", surface, override);
2109
2110     return WINED3D_OK;
2111 }
2112
2113 static HRESULT gdi_surface_set_mem(struct wined3d_surface *surface, void *mem)
2114 {
2115     TRACE("surface %p, mem %p.\n", surface, mem);
2116
2117     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
2118     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
2119     {
2120         ERR("Not supported on render targets.\n");
2121         return WINED3DERR_INVALIDCALL;
2122     }
2123
2124     if (mem && mem != surface->resource.allocatedMemory)
2125     {
2126         void *release = NULL;
2127
2128         /* Do I have to copy the old surface content? */
2129         if (surface->flags & SFLAG_DIBSECTION)
2130         {
2131             SelectObject(surface->hDC, surface->dib.holdbitmap);
2132             DeleteDC(surface->hDC);
2133             /* Release the DIB section. */
2134             DeleteObject(surface->dib.DIBsection);
2135             surface->dib.bitmap_data = NULL;
2136             surface->resource.allocatedMemory = NULL;
2137             surface->hDC = NULL;
2138             surface->flags &= ~SFLAG_DIBSECTION;
2139         }
2140         else if (!(surface->flags & SFLAG_USERPTR))
2141         {
2142             release = surface->resource.allocatedMemory;
2143         }
2144         surface->resource.allocatedMemory = mem;
2145         surface->flags |= SFLAG_USERPTR | SFLAG_INSYSMEM;
2146
2147         /* Now free the old memory, if any. */
2148         HeapFree(GetProcessHeap(), 0, release);
2149     }
2150     else if (surface->flags & SFLAG_USERPTR)
2151     {
2152         /* Map() and GetDC() will re-create the dib section and allocated memory. */
2153         surface->resource.allocatedMemory = NULL;
2154         surface->flags &= ~SFLAG_USERPTR;
2155     }
2156
2157     return WINED3D_OK;
2158 }
2159
2160 static const struct wined3d_surface_ops gdi_surface_ops =
2161 {
2162     gdi_surface_private_setup,
2163     surface_gdi_cleanup,
2164     gdi_surface_realize_palette,
2165     gdi_surface_draw_overlay,
2166     gdi_surface_preload,
2167     gdi_surface_map,
2168     gdi_surface_unmap,
2169     gdi_surface_getdc,
2170     gdi_surface_flip,
2171     gdi_surface_set_mem,
2172 };
2173
2174 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2175 {
2176     GLuint *name;
2177     DWORD flag;
2178
2179     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2180
2181     if(srgb)
2182     {
2183         name = &surface->texture_name_srgb;
2184         flag = SFLAG_INSRGBTEX;
2185     }
2186     else
2187     {
2188         name = &surface->texture_name;
2189         flag = SFLAG_INTEXTURE;
2190     }
2191
2192     if (!*name && new_name)
2193     {
2194         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2195          * surface has no texture name yet. See if we can get rid of this. */
2196         if (surface->flags & flag)
2197             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2198         surface_modify_location(surface, flag, FALSE);
2199     }
2200
2201     *name = new_name;
2202     surface_force_reload(surface);
2203 }
2204
2205 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
2206 {
2207     TRACE("surface %p, target %#x.\n", surface, target);
2208
2209     if (surface->texture_target != target)
2210     {
2211         if (target == GL_TEXTURE_RECTANGLE_ARB)
2212         {
2213             surface->flags &= ~SFLAG_NORMCOORD;
2214         }
2215         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2216         {
2217             surface->flags |= SFLAG_NORMCOORD;
2218         }
2219     }
2220     surface->texture_target = target;
2221     surface_force_reload(surface);
2222 }
2223
2224 /* Context activation is done by the caller. */
2225 void surface_bind(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL srgb)
2226 {
2227     TRACE("surface %p, gl_info %p, srgb %#x.\n", surface, gl_info, srgb);
2228
2229     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2230     {
2231         struct wined3d_texture *texture = surface->container.u.texture;
2232
2233         TRACE("Passing to container (%p).\n", texture);
2234         texture->texture_ops->texture_bind(texture, gl_info, srgb);
2235     }
2236     else
2237     {
2238         if (surface->texture_level)
2239         {
2240             ERR("Standalone surface %p is non-zero texture level %u.\n",
2241                     surface, surface->texture_level);
2242         }
2243
2244         if (srgb)
2245             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
2246
2247         ENTER_GL();
2248
2249         if (!surface->texture_name)
2250         {
2251             glGenTextures(1, &surface->texture_name);
2252             checkGLcall("glGenTextures");
2253
2254             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
2255
2256             glBindTexture(surface->texture_target, surface->texture_name);
2257             checkGLcall("glBindTexture");
2258             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2259             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2260             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
2261             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2262             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2263             checkGLcall("glTexParameteri");
2264         }
2265         else
2266         {
2267             glBindTexture(surface->texture_target, surface->texture_name);
2268             checkGLcall("glBindTexture");
2269         }
2270
2271         LEAVE_GL();
2272     }
2273 }
2274
2275 /* This call just downloads data, the caller is responsible for binding the
2276  * correct texture. */
2277 /* Context activation is done by the caller. */
2278 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2279 {
2280     const struct wined3d_format *format = surface->resource.format;
2281
2282     /* Only support read back of converted P8 surfaces. */
2283     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2284     {
2285         FIXME("Readback conversion not supported for format %s.\n", debug_d3dformat(format->id));
2286         return;
2287     }
2288
2289     ENTER_GL();
2290
2291     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2292     {
2293         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2294                 surface, surface->texture_level, format->glFormat, format->glType,
2295                 surface->resource.allocatedMemory);
2296
2297         if (surface->flags & SFLAG_PBO)
2298         {
2299             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2300             checkGLcall("glBindBufferARB");
2301             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2302             checkGLcall("glGetCompressedTexImageARB");
2303             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2304             checkGLcall("glBindBufferARB");
2305         }
2306         else
2307         {
2308             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2309                     surface->texture_level, surface->resource.allocatedMemory));
2310             checkGLcall("glGetCompressedTexImageARB");
2311         }
2312
2313         LEAVE_GL();
2314     }
2315     else
2316     {
2317         void *mem;
2318         GLenum gl_format = format->glFormat;
2319         GLenum gl_type = format->glType;
2320         int src_pitch = 0;
2321         int dst_pitch = 0;
2322
2323         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2324         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2325         {
2326             gl_format = GL_ALPHA;
2327             gl_type = GL_UNSIGNED_BYTE;
2328         }
2329
2330         if (surface->flags & SFLAG_NONPOW2)
2331         {
2332             unsigned char alignment = surface->resource.device->surface_alignment;
2333             src_pitch = format->byte_count * surface->pow2Width;
2334             dst_pitch = wined3d_surface_get_pitch(surface);
2335             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2336             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2337         }
2338         else
2339         {
2340             mem = surface->resource.allocatedMemory;
2341         }
2342
2343         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2344                 surface, surface->texture_level, gl_format, gl_type, mem);
2345
2346         if (surface->flags & SFLAG_PBO)
2347         {
2348             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2349             checkGLcall("glBindBufferARB");
2350
2351             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2352             checkGLcall("glGetTexImage");
2353
2354             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2355             checkGLcall("glBindBufferARB");
2356         }
2357         else
2358         {
2359             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2360             checkGLcall("glGetTexImage");
2361         }
2362         LEAVE_GL();
2363
2364         if (surface->flags & SFLAG_NONPOW2)
2365         {
2366             const BYTE *src_data;
2367             BYTE *dst_data;
2368             UINT y;
2369             /*
2370              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2371              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2372              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2373              *
2374              * We're doing this...
2375              *
2376              * instead of boxing the texture :
2377              * |<-texture width ->|  -->pow2width|   /\
2378              * |111111111111111111|              |   |
2379              * |222 Texture 222222| boxed empty  | texture height
2380              * |3333 Data 33333333|              |   |
2381              * |444444444444444444|              |   \/
2382              * -----------------------------------   |
2383              * |     boxed  empty | boxed empty  | pow2height
2384              * |                  |              |   \/
2385              * -----------------------------------
2386              *
2387              *
2388              * we're repacking the data to the expected texture width
2389              *
2390              * |<-texture width ->|  -->pow2width|   /\
2391              * |111111111111111111222222222222222|   |
2392              * |222333333333333333333444444444444| texture height
2393              * |444444                           |   |
2394              * |                                 |   \/
2395              * |                                 |   |
2396              * |            empty                | pow2height
2397              * |                                 |   \/
2398              * -----------------------------------
2399              *
2400              * == is the same as
2401              *
2402              * |<-texture width ->|    /\
2403              * |111111111111111111|
2404              * |222222222222222222|texture height
2405              * |333333333333333333|
2406              * |444444444444444444|    \/
2407              * --------------------
2408              *
2409              * this also means that any references to allocatedMemory should work with the data as if were a
2410              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2411              *
2412              * internally the texture is still stored in a boxed format so any references to textureName will
2413              * get a boxed texture with width pow2width and not a texture of width resource.width.
2414              *
2415              * Performance should not be an issue, because applications normally do not lock the surfaces when
2416              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2417              * and doesn't have to be re-read. */
2418             src_data = mem;
2419             dst_data = surface->resource.allocatedMemory;
2420             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2421             for (y = 1; y < surface->resource.height; ++y)
2422             {
2423                 /* skip the first row */
2424                 src_data += src_pitch;
2425                 dst_data += dst_pitch;
2426                 memcpy(dst_data, src_data, dst_pitch);
2427             }
2428
2429             HeapFree(GetProcessHeap(), 0, mem);
2430         }
2431     }
2432
2433     /* Surface has now been downloaded */
2434     surface->flags |= SFLAG_INSYSMEM;
2435 }
2436
2437 /* This call just uploads data, the caller is responsible for binding the
2438  * correct texture. */
2439 /* Context activation is done by the caller. */
2440 void surface_upload_data(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2441         const struct wined3d_format *format, const RECT *src_rect, UINT src_w, const POINT *dst_point,
2442         BOOL srgb, const struct wined3d_bo_address *data)
2443 {
2444     UINT update_w = src_rect->right - src_rect->left;
2445     UINT update_h = src_rect->bottom - src_rect->top;
2446
2447     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_w %u, dst_point %p, srgb %#x, data {%#x:%p}.\n",
2448             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_w,
2449             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2450
2451     if (format->heightscale != 1.0f && format->heightscale != 0.0f)
2452         update_h *= format->heightscale;
2453
2454     ENTER_GL();
2455
2456     if (data->buffer_object)
2457     {
2458         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2459         checkGLcall("glBindBufferARB");
2460     }
2461
2462     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2463     {
2464         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2465         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2466         UINT src_pitch = wined3d_format_calculate_size(format, 1, src_w, 1);
2467         const BYTE *addr = data->addr;
2468         GLenum internal;
2469
2470         addr += (src_rect->top / format->block_height) * src_pitch;
2471         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2472
2473         if (srgb)
2474             internal = format->glGammaInternal;
2475         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2476             internal = format->rtInternal;
2477         else
2478             internal = format->glInternal;
2479
2480         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2481                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2482                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2483
2484         if (row_length == src_pitch)
2485         {
2486             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2487                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2488         }
2489         else
2490         {
2491             UINT row, y;
2492
2493             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2494              * can't use the unpack row length like below. */
2495             for (row = 0, y = dst_point->y; row < row_count; ++row)
2496             {
2497                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2498                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2499                 y += format->block_height;
2500                 addr += src_pitch;
2501             }
2502         }
2503         checkGLcall("glCompressedTexSubImage2DARB");
2504     }
2505     else
2506     {
2507         const BYTE *addr = data->addr;
2508
2509         addr += src_rect->top * src_w * format->byte_count;
2510         addr += src_rect->left * format->byte_count;
2511
2512         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2513                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2514                 update_w, update_h, format->glFormat, format->glType, addr);
2515
2516         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_w);
2517         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2518                 update_w, update_h, format->glFormat, format->glType, addr);
2519         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2520         checkGLcall("glTexSubImage2D");
2521     }
2522
2523     if (data->buffer_object)
2524     {
2525         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2526         checkGLcall("glBindBufferARB");
2527     }
2528
2529     LEAVE_GL();
2530
2531     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2532     {
2533         struct wined3d_device *device = surface->resource.device;
2534         unsigned int i;
2535
2536         for (i = 0; i < device->context_count; ++i)
2537         {
2538             context_surface_update(device->contexts[i], surface);
2539         }
2540     }
2541 }
2542
2543 /* This call just allocates the texture, the caller is responsible for binding
2544  * the correct texture. */
2545 /* Context activation is done by the caller. */
2546 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2547         const struct wined3d_format *format, BOOL srgb)
2548 {
2549     BOOL enable_client_storage = FALSE;
2550     GLsizei width = surface->pow2Width;
2551     GLsizei height = surface->pow2Height;
2552     const BYTE *mem = NULL;
2553     GLenum internal;
2554
2555     if (srgb)
2556     {
2557         internal = format->glGammaInternal;
2558     }
2559     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2560     {
2561         internal = format->rtInternal;
2562     }
2563     else
2564     {
2565         internal = format->glInternal;
2566     }
2567
2568     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2569
2570     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2571             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2572             internal, width, height, format->glFormat, format->glType);
2573
2574     ENTER_GL();
2575
2576     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2577     {
2578         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2579                 || !surface->resource.allocatedMemory)
2580         {
2581             /* In some cases we want to disable client storage.
2582              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2583              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2584              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2585              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2586              */
2587             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2588             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2589             surface->flags &= ~SFLAG_CLIENT;
2590             enable_client_storage = TRUE;
2591         }
2592         else
2593         {
2594             surface->flags |= SFLAG_CLIENT;
2595
2596             /* Point OpenGL to our allocated texture memory. Do not use
2597              * resource.allocatedMemory here because it might point into a
2598              * PBO. Instead use heapMemory, but get the alignment right. */
2599             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2600                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2601         }
2602     }
2603
2604     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2605     {
2606         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2607                 internal, width, height, 0, surface->resource.size, mem));
2608         checkGLcall("glCompressedTexImage2DARB");
2609     }
2610     else
2611     {
2612         glTexImage2D(surface->texture_target, surface->texture_level,
2613                 internal, width, height, 0, format->glFormat, format->glType, mem);
2614         checkGLcall("glTexImage2D");
2615     }
2616
2617     if(enable_client_storage) {
2618         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2619         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2620     }
2621     LEAVE_GL();
2622 }
2623
2624 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2625  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2626 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2627 /* GL locking is done by the caller */
2628 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2629 {
2630     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2631     struct wined3d_renderbuffer_entry *entry;
2632     GLuint renderbuffer = 0;
2633     unsigned int src_width, src_height;
2634     unsigned int width, height;
2635
2636     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2637     {
2638         width = rt->pow2Width;
2639         height = rt->pow2Height;
2640     }
2641     else
2642     {
2643         width = surface->pow2Width;
2644         height = surface->pow2Height;
2645     }
2646
2647     src_width = surface->pow2Width;
2648     src_height = surface->pow2Height;
2649
2650     /* A depth stencil smaller than the render target is not valid */
2651     if (width > src_width || height > src_height) return;
2652
2653     /* Remove any renderbuffer set if the sizes match */
2654     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2655             || (width == src_width && height == src_height))
2656     {
2657         surface->current_renderbuffer = NULL;
2658         return;
2659     }
2660
2661     /* Look if we've already got a renderbuffer of the correct dimensions */
2662     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2663     {
2664         if (entry->width == width && entry->height == height)
2665         {
2666             renderbuffer = entry->id;
2667             surface->current_renderbuffer = entry;
2668             break;
2669         }
2670     }
2671
2672     if (!renderbuffer)
2673     {
2674         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2675         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2676         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2677                 surface->resource.format->glInternal, width, height);
2678
2679         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2680         entry->width = width;
2681         entry->height = height;
2682         entry->id = renderbuffer;
2683         list_add_head(&surface->renderbuffers, &entry->entry);
2684
2685         surface->current_renderbuffer = entry;
2686     }
2687
2688     checkGLcall("set_compatible_renderbuffer");
2689 }
2690
2691 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2692 {
2693     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2694
2695     TRACE("surface %p.\n", surface);
2696
2697     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2698     {
2699         ERR("Surface %p is not on a swapchain.\n", surface);
2700         return GL_NONE;
2701     }
2702
2703     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2704     {
2705         if (swapchain->render_to_fbo)
2706         {
2707             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2708             return GL_COLOR_ATTACHMENT0;
2709         }
2710         TRACE("Returning GL_BACK\n");
2711         return GL_BACK;
2712     }
2713     else if (surface == swapchain->front_buffer)
2714     {
2715         TRACE("Returning GL_FRONT\n");
2716         return GL_FRONT;
2717     }
2718
2719     FIXME("Higher back buffer, returning GL_BACK\n");
2720     return GL_BACK;
2721 }
2722
2723 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2724 void surface_add_dirty_rect(struct wined3d_surface *surface, const WINED3DBOX *dirty_rect)
2725 {
2726     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2727
2728     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2729         /* No partial locking for textures yet. */
2730         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2731
2732     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2733     if (dirty_rect)
2734     {
2735         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->Left);
2736         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->Top);
2737         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->Right);
2738         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->Bottom);
2739     }
2740     else
2741     {
2742         surface->dirtyRect.left = 0;
2743         surface->dirtyRect.top = 0;
2744         surface->dirtyRect.right = surface->resource.width;
2745         surface->dirtyRect.bottom = surface->resource.height;
2746     }
2747
2748     /* if the container is a texture then mark it dirty. */
2749     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2750     {
2751         TRACE("Passing to container.\n");
2752         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2753     }
2754 }
2755
2756 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2757 {
2758     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2759     BOOL ck_changed;
2760
2761     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2762
2763     if (surface->resource.pool == WINED3DPOOL_SCRATCH)
2764     {
2765         ERR("Not supported on scratch surfaces.\n");
2766         return WINED3DERR_INVALIDCALL;
2767     }
2768
2769     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2770
2771     /* Reload if either the texture and sysmem have different ideas about the
2772      * color key, or the actual key values changed. */
2773     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2774             && (surface->glCKey.dwColorSpaceLowValue != surface->SrcBltCKey.dwColorSpaceLowValue
2775             || surface->glCKey.dwColorSpaceHighValue != surface->SrcBltCKey.dwColorSpaceHighValue)))
2776     {
2777         TRACE("Reloading because of color keying\n");
2778         /* To perform the color key conversion we need a sysmem copy of
2779          * the surface. Make sure we have it. */
2780
2781         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2782         /* Make sure the texture is reloaded because of the color key change,
2783          * this kills performance though :( */
2784         /* TODO: This is not necessarily needed with hw palettized texture support. */
2785         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2786         /* Switching color keying on / off may change the internal format. */
2787         if (ck_changed)
2788             surface_force_reload(surface);
2789     }
2790     else if (!(surface->flags & flag))
2791     {
2792         TRACE("Reloading because surface is dirty.\n");
2793     }
2794     else
2795     {
2796         TRACE("surface is already in texture\n");
2797         return WINED3D_OK;
2798     }
2799
2800     /* No partial locking for textures yet. */
2801     surface_load_location(surface, flag, NULL);
2802     surface_evict_sysmem(surface);
2803
2804     return WINED3D_OK;
2805 }
2806
2807 /* See also float_16_to_32() in wined3d_private.h */
2808 static inline unsigned short float_32_to_16(const float *in)
2809 {
2810     int exp = 0;
2811     float tmp = fabsf(*in);
2812     unsigned int mantissa;
2813     unsigned short ret;
2814
2815     /* Deal with special numbers */
2816     if (*in == 0.0f)
2817         return 0x0000;
2818     if (isnan(*in))
2819         return 0x7c01;
2820     if (isinf(*in))
2821         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2822
2823     if (tmp < powf(2, 10))
2824     {
2825         do
2826         {
2827             tmp = tmp * 2.0f;
2828             exp--;
2829         } while (tmp < powf(2, 10));
2830     }
2831     else if (tmp >= powf(2, 11))
2832     {
2833         do
2834         {
2835             tmp /= 2.0f;
2836             exp++;
2837         } while (tmp >= powf(2, 11));
2838     }
2839
2840     mantissa = (unsigned int)tmp;
2841     if (tmp - mantissa >= 0.5f)
2842         ++mantissa; /* Round to nearest, away from zero. */
2843
2844     exp += 10;  /* Normalize the mantissa. */
2845     exp += 15;  /* Exponent is encoded with excess 15. */
2846
2847     if (exp > 30) /* too big */
2848     {
2849         ret = 0x7c00; /* INF */
2850     }
2851     else if (exp <= 0)
2852     {
2853         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2854         while (exp <= 0)
2855         {
2856             mantissa = mantissa >> 1;
2857             ++exp;
2858         }
2859         ret = mantissa & 0x3ff;
2860     }
2861     else
2862     {
2863         ret = (exp << 10) | (mantissa & 0x3ff);
2864     }
2865
2866     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2867     return ret;
2868 }
2869
2870 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2871 {
2872     ULONG refcount;
2873
2874     TRACE("Surface %p, container %p of type %#x.\n",
2875             surface, surface->container.u.base, surface->container.type);
2876
2877     switch (surface->container.type)
2878     {
2879         case WINED3D_CONTAINER_TEXTURE:
2880             return wined3d_texture_incref(surface->container.u.texture);
2881
2882         case WINED3D_CONTAINER_SWAPCHAIN:
2883             return wined3d_swapchain_incref(surface->container.u.swapchain);
2884
2885         default:
2886             ERR("Unhandled container type %#x.\n", surface->container.type);
2887         case WINED3D_CONTAINER_NONE:
2888             break;
2889     }
2890
2891     refcount = InterlockedIncrement(&surface->resource.ref);
2892     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2893
2894     return refcount;
2895 }
2896
2897 /* Do not call while under the GL lock. */
2898 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2899 {
2900     ULONG refcount;
2901
2902     TRACE("Surface %p, container %p of type %#x.\n",
2903             surface, surface->container.u.base, surface->container.type);
2904
2905     switch (surface->container.type)
2906     {
2907         case WINED3D_CONTAINER_TEXTURE:
2908             return wined3d_texture_decref(surface->container.u.texture);
2909
2910         case WINED3D_CONTAINER_SWAPCHAIN:
2911             return wined3d_swapchain_decref(surface->container.u.swapchain);
2912
2913         default:
2914             ERR("Unhandled container type %#x.\n", surface->container.type);
2915         case WINED3D_CONTAINER_NONE:
2916             break;
2917     }
2918
2919     refcount = InterlockedDecrement(&surface->resource.ref);
2920     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2921
2922     if (!refcount)
2923     {
2924         surface->surface_ops->surface_cleanup(surface);
2925         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2926
2927         TRACE("Destroyed surface %p.\n", surface);
2928         HeapFree(GetProcessHeap(), 0, surface);
2929     }
2930
2931     return refcount;
2932 }
2933
2934 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2935 {
2936     return resource_set_priority(&surface->resource, priority);
2937 }
2938
2939 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2940 {
2941     return resource_get_priority(&surface->resource);
2942 }
2943
2944 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2945 {
2946     TRACE("surface %p.\n", surface);
2947
2948     surface->surface_ops->surface_preload(surface);
2949 }
2950
2951 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2952 {
2953     TRACE("surface %p.\n", surface);
2954
2955     return surface->resource.parent;
2956 }
2957
2958 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2959 {
2960     TRACE("surface %p.\n", surface);
2961
2962     return &surface->resource;
2963 }
2964
2965 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2966 {
2967     TRACE("surface %p, flags %#x.\n", surface, flags);
2968
2969     switch (flags)
2970     {
2971         case WINEDDGBS_CANBLT:
2972         case WINEDDGBS_ISBLTDONE:
2973             return WINED3D_OK;
2974
2975         default:
2976             return WINED3DERR_INVALIDCALL;
2977     }
2978 }
2979
2980 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2981 {
2982     TRACE("surface %p, flags %#x.\n", surface, flags);
2983
2984     /* XXX: DDERR_INVALIDSURFACETYPE */
2985
2986     switch (flags)
2987     {
2988         case WINEDDGFS_CANFLIP:
2989         case WINEDDGFS_ISFLIPDONE:
2990             return WINED3D_OK;
2991
2992         default:
2993             return WINED3DERR_INVALIDCALL;
2994     }
2995 }
2996
2997 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2998 {
2999     TRACE("surface %p.\n", surface);
3000
3001     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
3002     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
3003 }
3004
3005 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
3006 {
3007     TRACE("surface %p.\n", surface);
3008
3009     /* So far we don't lose anything :) */
3010     surface->flags &= ~SFLAG_LOST;
3011     return WINED3D_OK;
3012 }
3013
3014 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
3015 {
3016     TRACE("surface %p, palette %p.\n", surface, palette);
3017
3018     if (surface->palette == palette)
3019     {
3020         TRACE("Nop palette change.\n");
3021         return WINED3D_OK;
3022     }
3023
3024     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
3025         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
3026
3027     surface->palette = palette;
3028
3029     if (palette)
3030     {
3031         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3032             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
3033
3034         surface->surface_ops->surface_realize_palette(surface);
3035     }
3036
3037     return WINED3D_OK;
3038 }
3039
3040 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
3041         DWORD flags, const WINEDDCOLORKEY *color_key)
3042 {
3043     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
3044
3045     if (flags & WINEDDCKEY_COLORSPACE)
3046     {
3047         FIXME(" colorkey value not supported (%08x) !\n", flags);
3048         return WINED3DERR_INVALIDCALL;
3049     }
3050
3051     /* Dirtify the surface, but only if a key was changed. */
3052     if (color_key)
3053     {
3054         switch (flags & ~WINEDDCKEY_COLORSPACE)
3055         {
3056             case WINEDDCKEY_DESTBLT:
3057                 surface->DestBltCKey = *color_key;
3058                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3059                 break;
3060
3061             case WINEDDCKEY_DESTOVERLAY:
3062                 surface->DestOverlayCKey = *color_key;
3063                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3064                 break;
3065
3066             case WINEDDCKEY_SRCOVERLAY:
3067                 surface->SrcOverlayCKey = *color_key;
3068                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3069                 break;
3070
3071             case WINEDDCKEY_SRCBLT:
3072                 surface->SrcBltCKey = *color_key;
3073                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3074                 break;
3075         }
3076     }
3077     else
3078     {
3079         switch (flags & ~WINEDDCKEY_COLORSPACE)
3080         {
3081             case WINEDDCKEY_DESTBLT:
3082                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3083                 break;
3084
3085             case WINEDDCKEY_DESTOVERLAY:
3086                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3087                 break;
3088
3089             case WINEDDCKEY_SRCOVERLAY:
3090                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3091                 break;
3092
3093             case WINEDDCKEY_SRCBLT:
3094                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3095                 break;
3096         }
3097     }
3098
3099     return WINED3D_OK;
3100 }
3101
3102 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3103 {
3104     TRACE("surface %p.\n", surface);
3105
3106     return surface->palette;
3107 }
3108
3109 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3110 {
3111     const struct wined3d_format *format = surface->resource.format;
3112     DWORD pitch;
3113
3114     TRACE("surface %p.\n", surface);
3115
3116     if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3117     {
3118         /* Since compressed formats are block based, pitch means the amount of
3119          * bytes to the next row of block rather than the next row of pixels. */
3120         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3121         pitch = row_block_count * format->block_byte_count;
3122     }
3123     else
3124     {
3125         unsigned char alignment = surface->resource.device->surface_alignment;
3126         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3127         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3128     }
3129
3130     TRACE("Returning %u.\n", pitch);
3131
3132     return pitch;
3133 }
3134
3135 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3136 {
3137     TRACE("surface %p, mem %p.\n", surface, mem);
3138
3139     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
3140     {
3141         WARN("Surface is locked or the DC is in use.\n");
3142         return WINED3DERR_INVALIDCALL;
3143     }
3144
3145     return surface->surface_ops->surface_set_mem(surface, mem);
3146 }
3147
3148 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3149 {
3150     LONG w, h;
3151
3152     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3153
3154     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3155     {
3156         WARN("Not an overlay surface.\n");
3157         return WINEDDERR_NOTAOVERLAYSURFACE;
3158     }
3159
3160     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3161     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3162     surface->overlay_destrect.left = x;
3163     surface->overlay_destrect.top = y;
3164     surface->overlay_destrect.right = x + w;
3165     surface->overlay_destrect.bottom = y + h;
3166
3167     surface->surface_ops->surface_draw_overlay(surface);
3168
3169     return WINED3D_OK;
3170 }
3171
3172 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3173 {
3174     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3175
3176     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3177     {
3178         TRACE("Not an overlay surface.\n");
3179         return WINEDDERR_NOTAOVERLAYSURFACE;
3180     }
3181
3182     if (!surface->overlay_dest)
3183     {
3184         TRACE("Overlay not visible.\n");
3185         *x = 0;
3186         *y = 0;
3187         return WINEDDERR_OVERLAYNOTVISIBLE;
3188     }
3189
3190     *x = surface->overlay_destrect.left;
3191     *y = surface->overlay_destrect.top;
3192
3193     TRACE("Returning position %d, %d.\n", *x, *y);
3194
3195     return WINED3D_OK;
3196 }
3197
3198 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3199         DWORD flags, struct wined3d_surface *ref)
3200 {
3201     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3202
3203     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3204     {
3205         TRACE("Not an overlay surface.\n");
3206         return WINEDDERR_NOTAOVERLAYSURFACE;
3207     }
3208
3209     return WINED3D_OK;
3210 }
3211
3212 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3213         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3214 {
3215     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3216             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3217
3218     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3219     {
3220         WARN("Not an overlay surface.\n");
3221         return WINEDDERR_NOTAOVERLAYSURFACE;
3222     }
3223     else if (!dst_surface)
3224     {
3225         WARN("Dest surface is NULL.\n");
3226         return WINED3DERR_INVALIDCALL;
3227     }
3228
3229     if (src_rect)
3230     {
3231         surface->overlay_srcrect = *src_rect;
3232     }
3233     else
3234     {
3235         surface->overlay_srcrect.left = 0;
3236         surface->overlay_srcrect.top = 0;
3237         surface->overlay_srcrect.right = surface->resource.width;
3238         surface->overlay_srcrect.bottom = surface->resource.height;
3239     }
3240
3241     if (dst_rect)
3242     {
3243         surface->overlay_destrect = *dst_rect;
3244     }
3245     else
3246     {
3247         surface->overlay_destrect.left = 0;
3248         surface->overlay_destrect.top = 0;
3249         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3250         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3251     }
3252
3253     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3254     {
3255         list_remove(&surface->overlay_entry);
3256     }
3257
3258     if (flags & WINEDDOVER_SHOW)
3259     {
3260         if (surface->overlay_dest != dst_surface)
3261         {
3262             surface->overlay_dest = dst_surface;
3263             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3264         }
3265     }
3266     else if (flags & WINEDDOVER_HIDE)
3267     {
3268         /* tests show that the rectangles are erased on hide */
3269         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3270         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3271         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3272         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3273         surface->overlay_dest = NULL;
3274     }
3275
3276     surface->surface_ops->surface_draw_overlay(surface);
3277
3278     return WINED3D_OK;
3279 }
3280
3281 HRESULT CDECL wined3d_surface_set_clipper(struct wined3d_surface *surface, struct wined3d_clipper *clipper)
3282 {
3283     TRACE("surface %p, clipper %p.\n", surface, clipper);
3284
3285     surface->clipper = clipper;
3286
3287     return WINED3D_OK;
3288 }
3289
3290 struct wined3d_clipper * CDECL wined3d_surface_get_clipper(const struct wined3d_surface *surface)
3291 {
3292     TRACE("surface %p.\n", surface);
3293
3294     return surface->clipper;
3295 }
3296
3297 HRESULT CDECL wined3d_surface_set_format(struct wined3d_surface *surface, enum wined3d_format_id format_id)
3298 {
3299     const struct wined3d_format *format = wined3d_get_format(&surface->resource.device->adapter->gl_info, format_id);
3300
3301     TRACE("surface %p, format %s.\n", surface, debug_d3dformat(format_id));
3302
3303     if (surface->resource.format->id != WINED3DFMT_UNKNOWN)
3304     {
3305         FIXME("The format of the surface must be WINED3DFORMAT_UNKNOWN.\n");
3306         return WINED3DERR_INVALIDCALL;
3307     }
3308
3309     surface->resource.size = wined3d_format_calculate_size(format, surface->resource.device->surface_alignment,
3310             surface->pow2Width, surface->pow2Height);
3311     surface->flags |= (WINED3DFMT_D16_LOCKABLE == format_id) ? SFLAG_LOCKABLE : 0;
3312     surface->resource.format = format;
3313
3314     TRACE("size %u, byte_count %u\n", surface->resource.size, format->byte_count);
3315     TRACE("glFormat %#x, glInternal %#x, glType %#x.\n",
3316             format->glFormat, format->glInternal, format->glType);
3317
3318     return WINED3D_OK;
3319 }
3320
3321 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3322         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3323 {
3324     unsigned short *dst_s;
3325     const float *src_f;
3326     unsigned int x, y;
3327
3328     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3329
3330     for (y = 0; y < h; ++y)
3331     {
3332         src_f = (const float *)(src + y * pitch_in);
3333         dst_s = (unsigned short *) (dst + y * pitch_out);
3334         for (x = 0; x < w; ++x)
3335         {
3336             dst_s[x] = float_32_to_16(src_f + x);
3337         }
3338     }
3339 }
3340
3341 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3342         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3343 {
3344     static const unsigned char convert_5to8[] =
3345     {
3346         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3347         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3348         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3349         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3350     };
3351     static const unsigned char convert_6to8[] =
3352     {
3353         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3354         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3355         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3356         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3357         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3358         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3359         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3360         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3361     };
3362     unsigned int x, y;
3363
3364     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3365
3366     for (y = 0; y < h; ++y)
3367     {
3368         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3369         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3370         for (x = 0; x < w; ++x)
3371         {
3372             WORD pixel = src_line[x];
3373             dst_line[x] = 0xff000000
3374                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3375                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3376                     | convert_5to8[(pixel & 0x001f)];
3377         }
3378     }
3379 }
3380
3381 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3382  * in both cases we're just setting the X / Alpha channel to 0xff. */
3383 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3384         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3385 {
3386     unsigned int x, y;
3387
3388     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3389
3390     for (y = 0; y < h; ++y)
3391     {
3392         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3393         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3394
3395         for (x = 0; x < w; ++x)
3396         {
3397             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3398         }
3399     }
3400 }
3401
3402 static inline BYTE cliptobyte(int x)
3403 {
3404     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3405 }
3406
3407 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3408         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3409 {
3410     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3411     unsigned int x, y;
3412
3413     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3414
3415     for (y = 0; y < h; ++y)
3416     {
3417         const BYTE *src_line = src + y * pitch_in;
3418         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3419         for (x = 0; x < w; ++x)
3420         {
3421             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3422              *     C = Y - 16; D = U - 128; E = V - 128;
3423              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3424              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3425              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3426              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3427              * U and V are shared between the pixels. */
3428             if (!(x & 1)) /* For every even pixel, read new U and V. */
3429             {
3430                 d = (int) src_line[1] - 128;
3431                 e = (int) src_line[3] - 128;
3432                 r2 = 409 * e + 128;
3433                 g2 = - 100 * d - 208 * e + 128;
3434                 b2 = 516 * d + 128;
3435             }
3436             c2 = 298 * ((int) src_line[0] - 16);
3437             dst_line[x] = 0xff000000
3438                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3439                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3440                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3441                 /* Scale RGB values to 0..255 range,
3442                  * then clip them if still not in range (may be negative),
3443                  * then shift them within DWORD if necessary. */
3444             src_line += 2;
3445         }
3446     }
3447 }
3448
3449 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3450         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3451 {
3452     unsigned int x, y;
3453     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3454
3455     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3456
3457     for (y = 0; y < h; ++y)
3458     {
3459         const BYTE *src_line = src + y * pitch_in;
3460         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3461         for (x = 0; x < w; ++x)
3462         {
3463             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3464              *     C = Y - 16; D = U - 128; E = V - 128;
3465              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3466              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3467              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3468              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3469              * U and V are shared between the pixels. */
3470             if (!(x & 1)) /* For every even pixel, read new U and V. */
3471             {
3472                 d = (int) src_line[1] - 128;
3473                 e = (int) src_line[3] - 128;
3474                 r2 = 409 * e + 128;
3475                 g2 = - 100 * d - 208 * e + 128;
3476                 b2 = 516 * d + 128;
3477             }
3478             c2 = 298 * ((int) src_line[0] - 16);
3479             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3480                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3481                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3482                 /* Scale RGB values to 0..255 range,
3483                  * then clip them if still not in range (may be negative),
3484                  * then shift them within DWORD if necessary. */
3485             src_line += 2;
3486         }
3487     }
3488 }
3489
3490 struct d3dfmt_convertor_desc
3491 {
3492     enum wined3d_format_id from, to;
3493     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3494 };
3495
3496 static const struct d3dfmt_convertor_desc convertors[] =
3497 {
3498     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3499     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3500     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3501     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3502     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3503     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3504 };
3505
3506 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3507         enum wined3d_format_id to)
3508 {
3509     unsigned int i;
3510
3511     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3512     {
3513         if (convertors[i].from == from && convertors[i].to == to)
3514             return &convertors[i];
3515     }
3516
3517     return NULL;
3518 }
3519
3520 /*****************************************************************************
3521  * surface_convert_format
3522  *
3523  * Creates a duplicate of a surface in a different format. Is used by Blt to
3524  * blit between surfaces with different formats.
3525  *
3526  * Parameters
3527  *  source: Source surface
3528  *  fmt: Requested destination format
3529  *
3530  *****************************************************************************/
3531 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3532 {
3533     const struct d3dfmt_convertor_desc *conv;
3534     WINED3DLOCKED_RECT lock_src, lock_dst;
3535     struct wined3d_surface *ret = NULL;
3536     HRESULT hr;
3537
3538     conv = find_convertor(source->resource.format->id, to_fmt);
3539     if (!conv)
3540     {
3541         FIXME("Cannot find a conversion function from format %s to %s.\n",
3542                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3543         return NULL;
3544     }
3545
3546     wined3d_surface_create(source->resource.device, source->resource.width,
3547             source->resource.height, to_fmt, TRUE /* lockable */, TRUE /* discard  */, 0 /* level */,
3548             0 /* usage */, WINED3DPOOL_SCRATCH, WINED3DMULTISAMPLE_NONE /* TODO: Multisampled conversion */,
3549             0 /* MultiSampleQuality */, source->surface_type, NULL /* parent */, &wined3d_null_parent_ops, &ret);
3550     if (!ret)
3551     {
3552         ERR("Failed to create a destination surface for conversion.\n");
3553         return NULL;
3554     }
3555
3556     memset(&lock_src, 0, sizeof(lock_src));
3557     memset(&lock_dst, 0, sizeof(lock_dst));
3558
3559     hr = wined3d_surface_map(source, &lock_src, NULL, WINED3DLOCK_READONLY);
3560     if (FAILED(hr))
3561     {
3562         ERR("Failed to lock the source surface.\n");
3563         wined3d_surface_decref(ret);
3564         return NULL;
3565     }
3566     hr = wined3d_surface_map(ret, &lock_dst, NULL, WINED3DLOCK_READONLY);
3567     if (FAILED(hr))
3568     {
3569         ERR("Failed to lock the destination surface.\n");
3570         wined3d_surface_unmap(source);
3571         wined3d_surface_decref(ret);
3572         return NULL;
3573     }
3574
3575     conv->convert(lock_src.pBits, lock_dst.pBits, lock_src.Pitch, lock_dst.Pitch,
3576             source->resource.width, source->resource.height);
3577
3578     wined3d_surface_unmap(ret);
3579     wined3d_surface_unmap(source);
3580
3581     return ret;
3582 }
3583
3584 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3585         unsigned int bpp, UINT pitch, DWORD color)
3586 {
3587     BYTE *first;
3588     int x, y;
3589
3590     /* Do first row */
3591
3592 #define COLORFILL_ROW(type) \
3593 do { \
3594     type *d = (type *)buf; \
3595     for (x = 0; x < width; ++x) \
3596         d[x] = (type)color; \
3597 } while(0)
3598
3599     switch (bpp)
3600     {
3601         case 1:
3602             COLORFILL_ROW(BYTE);
3603             break;
3604
3605         case 2:
3606             COLORFILL_ROW(WORD);
3607             break;
3608
3609         case 3:
3610         {
3611             BYTE *d = buf;
3612             for (x = 0; x < width; ++x, d += 3)
3613             {
3614                 d[0] = (color      ) & 0xFF;
3615                 d[1] = (color >>  8) & 0xFF;
3616                 d[2] = (color >> 16) & 0xFF;
3617             }
3618             break;
3619         }
3620         case 4:
3621             COLORFILL_ROW(DWORD);
3622             break;
3623
3624         default:
3625             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3626             return WINED3DERR_NOTAVAILABLE;
3627     }
3628
3629 #undef COLORFILL_ROW
3630
3631     /* Now copy first row. */
3632     first = buf;
3633     for (y = 1; y < height; ++y)
3634     {
3635         buf += pitch;
3636         memcpy(buf, first, width * bpp);
3637     }
3638
3639     return WINED3D_OK;
3640 }
3641
3642 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3643 {
3644     TRACE("surface %p.\n", surface);
3645
3646     if (!(surface->flags & SFLAG_LOCKED))
3647     {
3648         WARN("Trying to unmap unmapped surface.\n");
3649         return WINEDDERR_NOTLOCKED;
3650     }
3651     surface->flags &= ~SFLAG_LOCKED;
3652
3653     surface->surface_ops->surface_unmap(surface);
3654
3655     return WINED3D_OK;
3656 }
3657
3658 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3659         WINED3DLOCKED_RECT *locked_rect, const RECT *rect, DWORD flags)
3660 {
3661     TRACE("surface %p, locked_rect %p, rect %s, flags %#x.\n",
3662             surface, locked_rect, wine_dbgstr_rect(rect), flags);
3663
3664     if (surface->flags & SFLAG_LOCKED)
3665     {
3666         WARN("Surface is already mapped.\n");
3667         return WINED3DERR_INVALIDCALL;
3668     }
3669     surface->flags |= SFLAG_LOCKED;
3670
3671     if (!(surface->flags & SFLAG_LOCKABLE))
3672         WARN("Trying to lock unlockable surface.\n");
3673
3674     surface->surface_ops->surface_map(surface, rect, flags);
3675
3676     locked_rect->Pitch = wined3d_surface_get_pitch(surface);
3677
3678     if (!rect)
3679     {
3680         locked_rect->pBits = surface->resource.allocatedMemory;
3681         surface->lockedRect.left = 0;
3682         surface->lockedRect.top = 0;
3683         surface->lockedRect.right = surface->resource.width;
3684         surface->lockedRect.bottom = surface->resource.height;
3685     }
3686     else
3687     {
3688         const struct wined3d_format *format = surface->resource.format;
3689
3690         if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3691         {
3692             /* Compressed textures are block based, so calculate the offset of
3693              * the block that contains the top-left pixel of the locked rectangle. */
3694             locked_rect->pBits = surface->resource.allocatedMemory
3695                     + ((rect->top / format->block_height) * locked_rect->Pitch)
3696                     + ((rect->left / format->block_width) * format->block_byte_count);
3697         }
3698         else
3699         {
3700             locked_rect->pBits = surface->resource.allocatedMemory
3701                     + (locked_rect->Pitch * rect->top)
3702                     + (rect->left * format->byte_count);
3703         }
3704         surface->lockedRect.left = rect->left;
3705         surface->lockedRect.top = rect->top;
3706         surface->lockedRect.right = rect->right;
3707         surface->lockedRect.bottom = rect->bottom;
3708     }
3709
3710     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3711     TRACE("Returning memory %p, pitch %u.\n", locked_rect->pBits, locked_rect->Pitch);
3712
3713     return WINED3D_OK;
3714 }
3715
3716 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3717 {
3718     HRESULT hr;
3719
3720     TRACE("surface %p, dc %p.\n", surface, dc);
3721
3722     if (surface->flags & SFLAG_USERPTR)
3723     {
3724         ERR("Not supported on surfaces with application-provided memory.\n");
3725         return WINEDDERR_NODC;
3726     }
3727
3728     /* Give more detailed info for ddraw. */
3729     if (surface->flags & SFLAG_DCINUSE)
3730         return WINEDDERR_DCALREADYCREATED;
3731
3732     /* Can't GetDC if the surface is locked. */
3733     if (surface->flags & SFLAG_LOCKED)
3734         return WINED3DERR_INVALIDCALL;
3735
3736     hr = surface->surface_ops->surface_getdc(surface);
3737     if (FAILED(hr))
3738         return hr;
3739
3740     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3741             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3742     {
3743         /* GetDC on palettized formats is unsupported in D3D9, and the method
3744          * is missing in D3D8, so this should only be used for DX <=7
3745          * surfaces (with non-device palettes). */
3746         const PALETTEENTRY *pal = NULL;
3747
3748         if (surface->palette)
3749         {
3750             pal = surface->palette->palents;
3751         }
3752         else
3753         {
3754             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3755             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3756
3757             if (dds_primary && dds_primary->palette)
3758                 pal = dds_primary->palette->palents;
3759         }
3760
3761         if (pal)
3762         {
3763             RGBQUAD col[256];
3764             unsigned int i;
3765
3766             for (i = 0; i < 256; ++i)
3767             {
3768                 col[i].rgbRed = pal[i].peRed;
3769                 col[i].rgbGreen = pal[i].peGreen;
3770                 col[i].rgbBlue = pal[i].peBlue;
3771                 col[i].rgbReserved = 0;
3772             }
3773             SetDIBColorTable(surface->hDC, 0, 256, col);
3774         }
3775     }
3776
3777     surface->flags |= SFLAG_DCINUSE;
3778
3779     *dc = surface->hDC;
3780     TRACE("Returning dc %p.\n", *dc);
3781
3782     return WINED3D_OK;
3783 }
3784
3785 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3786 {
3787     TRACE("surface %p, dc %p.\n", surface, dc);
3788
3789     if (!(surface->flags & SFLAG_DCINUSE))
3790         return WINEDDERR_NODC;
3791
3792     if (surface->hDC != dc)
3793     {
3794         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3795                 dc, surface->hDC);
3796         return WINEDDERR_NODC;
3797     }
3798
3799     if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
3800     {
3801         /* Copy the contents of the DIB over to the PBO. */
3802         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->dib.bitmap_size);
3803     }
3804
3805     /* We locked first, so unlock now. */
3806     wined3d_surface_unmap(surface);
3807
3808     surface->flags &= ~SFLAG_DCINUSE;
3809
3810     return WINED3D_OK;
3811 }
3812
3813 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3814 {
3815     struct wined3d_swapchain *swapchain;
3816     HRESULT hr;
3817
3818     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3819
3820     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
3821     {
3822         ERR("Flipped surface is not on a swapchain.\n");
3823         return WINEDDERR_NOTFLIPPABLE;
3824     }
3825     swapchain = surface->container.u.swapchain;
3826
3827     hr = surface->surface_ops->surface_flip(surface, override);
3828     if (FAILED(hr))
3829         return hr;
3830
3831     /* Just overwrite the swapchain presentation interval. This is ok because
3832      * only ddraw apps can call Flip, and only d3d8 and d3d9 applications
3833      * specify the presentation interval. */
3834     if (!(flags & (WINEDDFLIP_NOVSYNC | WINEDDFLIP_INTERVAL2 | WINEDDFLIP_INTERVAL3 | WINEDDFLIP_INTERVAL4)))
3835         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_ONE;
3836     else if (flags & WINEDDFLIP_NOVSYNC)
3837         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_IMMEDIATE;
3838     else if (flags & WINEDDFLIP_INTERVAL2)
3839         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_TWO;
3840     else if (flags & WINEDDFLIP_INTERVAL3)
3841         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_THREE;
3842     else
3843         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_FOUR;
3844
3845     return wined3d_swapchain_present(swapchain, NULL, NULL, swapchain->win_handle, NULL, 0);
3846 }
3847
3848 /* Do not call while under the GL lock. */
3849 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3850 {
3851     struct wined3d_device *device = surface->resource.device;
3852
3853     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3854
3855     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3856     {
3857         struct wined3d_texture *texture = surface->container.u.texture;
3858
3859         TRACE("Passing to container (%p).\n", texture);
3860         texture->texture_ops->texture_preload(texture, srgb);
3861     }
3862     else
3863     {
3864         struct wined3d_context *context = NULL;
3865
3866         TRACE("(%p) : About to load surface\n", surface);
3867
3868         if (!device->isInDraw) context = context_acquire(device, NULL);
3869
3870         if (surface->resource.format->id == WINED3DFMT_P8_UINT
3871                 || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3872         {
3873             if (palette9_changed(surface))
3874             {
3875                 TRACE("Reloading surface because the d3d8/9 palette was changed\n");
3876                 /* TODO: This is not necessarily needed with hw palettized texture support */
3877                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3878                 /* Make sure the texture is reloaded because of the palette change, this kills performance though :( */
3879                 surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
3880             }
3881         }
3882
3883         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3884
3885         if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3886         {
3887             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3888             GLclampf tmp;
3889             tmp = 0.9f;
3890             ENTER_GL();
3891             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3892             LEAVE_GL();
3893         }
3894
3895         if (context) context_release(context);
3896     }
3897 }
3898
3899 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3900 {
3901     if (!surface->resource.allocatedMemory)
3902     {
3903         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
3904                 surface->resource.size + RESOURCE_ALIGNMENT);
3905         if (!surface->resource.heapMemory)
3906         {
3907             ERR("Out of memory\n");
3908             return FALSE;
3909         }
3910         surface->resource.allocatedMemory =
3911             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
3912     }
3913     else
3914     {
3915         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
3916     }
3917
3918     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3919
3920     return TRUE;
3921 }
3922
3923 /* Read the framebuffer back into the surface */
3924 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
3925 {
3926     struct wined3d_device *device = surface->resource.device;
3927     const struct wined3d_gl_info *gl_info;
3928     struct wined3d_context *context;
3929     BYTE *mem;
3930     GLint fmt;
3931     GLint type;
3932     BYTE *row, *top, *bottom;
3933     int i;
3934     BOOL bpp;
3935     RECT local_rect;
3936     BOOL srcIsUpsideDown;
3937     GLint rowLen = 0;
3938     GLint skipPix = 0;
3939     GLint skipRow = 0;
3940
3941     if(wined3d_settings.rendertargetlock_mode == RTL_DISABLE) {
3942         static BOOL warned = FALSE;
3943         if(!warned) {
3944             ERR("The application tries to lock the render target, but render target locking is disabled\n");
3945             warned = TRUE;
3946         }
3947         return;
3948     }
3949
3950     context = context_acquire(device, surface);
3951     context_apply_blit_state(context, device);
3952     gl_info = context->gl_info;
3953
3954     ENTER_GL();
3955
3956     /* Select the correct read buffer, and give some debug output.
3957      * There is no need to keep track of the current read buffer or reset it, every part of the code
3958      * that reads sets the read buffer as desired.
3959      */
3960     if (surface_is_offscreen(surface))
3961     {
3962         /* Mapping the primary render target which is not on a swapchain.
3963          * Read from the back buffer. */
3964         TRACE("Mapping offscreen render target.\n");
3965         glReadBuffer(device->offscreenBuffer);
3966         srcIsUpsideDown = TRUE;
3967     }
3968     else
3969     {
3970         /* Onscreen surfaces are always part of a swapchain */
3971         GLenum buffer = surface_get_gl_buffer(surface);
3972         TRACE("Mapping %#x buffer.\n", buffer);
3973         glReadBuffer(buffer);
3974         checkGLcall("glReadBuffer");
3975         srcIsUpsideDown = FALSE;
3976     }
3977
3978     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
3979     if (!rect)
3980     {
3981         local_rect.left = 0;
3982         local_rect.top = 0;
3983         local_rect.right = surface->resource.width;
3984         local_rect.bottom = surface->resource.height;
3985     }
3986     else
3987     {
3988         local_rect = *rect;
3989     }
3990     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
3991
3992     switch (surface->resource.format->id)
3993     {
3994         case WINED3DFMT_P8_UINT:
3995         {
3996             if (primary_render_target_is_p8(device))
3997             {
3998                 /* In case of P8 render targets the index is stored in the alpha component */
3999                 fmt = GL_ALPHA;
4000                 type = GL_UNSIGNED_BYTE;
4001                 mem = dest;
4002                 bpp = surface->resource.format->byte_count;
4003             }
4004             else
4005             {
4006                 /* GL can't return palettized data, so read ARGB pixels into a
4007                  * separate block of memory and convert them into palettized format
4008                  * in software. Slow, but if the app means to use palettized render
4009                  * targets and locks it...
4010                  *
4011                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4012                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4013                  * for the color channels when palettizing the colors.
4014                  */
4015                 fmt = GL_RGB;
4016                 type = GL_UNSIGNED_BYTE;
4017                 pitch *= 3;
4018                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4019                 if (!mem)
4020                 {
4021                     ERR("Out of memory\n");
4022                     LEAVE_GL();
4023                     return;
4024                 }
4025                 bpp = surface->resource.format->byte_count * 3;
4026             }
4027         }
4028         break;
4029
4030         default:
4031             mem = dest;
4032             fmt = surface->resource.format->glFormat;
4033             type = surface->resource.format->glType;
4034             bpp = surface->resource.format->byte_count;
4035     }
4036
4037     if (surface->flags & SFLAG_PBO)
4038     {
4039         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4040         checkGLcall("glBindBufferARB");
4041         if (mem)
4042         {
4043             ERR("mem not null for pbo -- unexpected\n");
4044             mem = NULL;
4045         }
4046     }
4047
4048     /* Save old pixel store pack state */
4049     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4050     checkGLcall("glGetIntegerv");
4051     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4052     checkGLcall("glGetIntegerv");
4053     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4054     checkGLcall("glGetIntegerv");
4055
4056     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4057     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4058     checkGLcall("glPixelStorei");
4059     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4060     checkGLcall("glPixelStorei");
4061     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4062     checkGLcall("glPixelStorei");
4063
4064     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4065             local_rect.right - local_rect.left,
4066             local_rect.bottom - local_rect.top,
4067             fmt, type, mem);
4068     checkGLcall("glReadPixels");
4069
4070     /* Reset previous pixel store pack state */
4071     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4072     checkGLcall("glPixelStorei");
4073     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4074     checkGLcall("glPixelStorei");
4075     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4076     checkGLcall("glPixelStorei");
4077
4078     if (surface->flags & SFLAG_PBO)
4079     {
4080         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4081         checkGLcall("glBindBufferARB");
4082
4083         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4084          * to get a pointer to it and perform the flipping in software. This is a lot
4085          * faster than calling glReadPixels for each line. In case we want more speed
4086          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4087         if (!srcIsUpsideDown)
4088         {
4089             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4090             checkGLcall("glBindBufferARB");
4091
4092             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4093             checkGLcall("glMapBufferARB");
4094         }
4095     }
4096
4097     /* TODO: Merge this with the palettization loop below for P8 targets */
4098     if(!srcIsUpsideDown) {
4099         UINT len, off;
4100         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4101             Flip the lines in software */
4102         len = (local_rect.right - local_rect.left) * bpp;
4103         off = local_rect.left * bpp;
4104
4105         row = HeapAlloc(GetProcessHeap(), 0, len);
4106         if(!row) {
4107             ERR("Out of memory\n");
4108             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4109                 HeapFree(GetProcessHeap(), 0, mem);
4110             LEAVE_GL();
4111             return;
4112         }
4113
4114         top = mem + pitch * local_rect.top;
4115         bottom = mem + pitch * (local_rect.bottom - 1);
4116         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4117             memcpy(row, top + off, len);
4118             memcpy(top + off, bottom + off, len);
4119             memcpy(bottom + off, row, len);
4120             top += pitch;
4121             bottom -= pitch;
4122         }
4123         HeapFree(GetProcessHeap(), 0, row);
4124
4125         /* Unmap the temp PBO buffer */
4126         if (surface->flags & SFLAG_PBO)
4127         {
4128             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4129             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4130         }
4131     }
4132
4133     LEAVE_GL();
4134     context_release(context);
4135
4136     /* For P8 textures we need to perform an inverse palette lookup. This is
4137      * done by searching for a palette index which matches the RGB value.
4138      * Note this isn't guaranteed to work when there are multiple entries for
4139      * the same color but we have no choice. In case of P8 render targets,
4140      * the index is stored in the alpha component so no conversion is needed. */
4141     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4142     {
4143         const PALETTEENTRY *pal = NULL;
4144         DWORD width = pitch / 3;
4145         int x, y, c;
4146
4147         if (surface->palette)
4148         {
4149             pal = surface->palette->palents;
4150         }
4151         else
4152         {
4153             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4154             HeapFree(GetProcessHeap(), 0, mem);
4155             return;
4156         }
4157
4158         for(y = local_rect.top; y < local_rect.bottom; y++) {
4159             for(x = local_rect.left; x < local_rect.right; x++) {
4160                 /*                      start              lines            pixels      */
4161                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4162                 const BYTE *green = blue  + 1;
4163                 const BYTE *red = green + 1;
4164
4165                 for(c = 0; c < 256; c++) {
4166                     if(*red   == pal[c].peRed   &&
4167                        *green == pal[c].peGreen &&
4168                        *blue  == pal[c].peBlue)
4169                     {
4170                         *((BYTE *) dest + y * width + x) = c;
4171                         break;
4172                     }
4173                 }
4174             }
4175         }
4176         HeapFree(GetProcessHeap(), 0, mem);
4177     }
4178 }
4179
4180 /* Read the framebuffer contents into a texture */
4181 static void read_from_framebuffer_texture(struct wined3d_surface *surface, BOOL srgb)
4182 {
4183     struct wined3d_device *device = surface->resource.device;
4184     const struct wined3d_gl_info *gl_info;
4185     struct wined3d_context *context;
4186
4187     if (!surface_is_offscreen(surface))
4188     {
4189         /* We would need to flip onscreen surfaces, but there's no efficient
4190          * way to do that here. It makes more sense for the caller to
4191          * explicitly go through sysmem. */
4192         ERR("Not supported for onscreen targets.\n");
4193         return;
4194     }
4195
4196     /* Activate the surface to read from. In some situations it isn't the currently active target(e.g. backbuffer
4197      * locking during offscreen rendering). RESOURCELOAD is ok because glCopyTexSubImage2D isn't affected by any
4198      * states in the stateblock, and no driver was found yet that had bugs in that regard.
4199      */
4200     context = context_acquire(device, surface);
4201     gl_info = context->gl_info;
4202     device_invalidate_state(device, STATE_FRAMEBUFFER);
4203
4204     surface_prepare_texture(surface, gl_info, srgb);
4205     surface_bind_and_dirtify(surface, gl_info, srgb);
4206
4207     TRACE("Reading back offscreen render target %p.\n", surface);
4208
4209     ENTER_GL();
4210
4211     glReadBuffer(device->offscreenBuffer);
4212     checkGLcall("glReadBuffer");
4213
4214     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4215             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4216     checkGLcall("glCopyTexSubImage2D");
4217
4218     LEAVE_GL();
4219
4220     context_release(context);
4221 }
4222
4223 /* Context activation is done by the caller. */
4224 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4225         const struct wined3d_gl_info *gl_info, BOOL srgb)
4226 {
4227     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4228     CONVERT_TYPES convert;
4229     struct wined3d_format format;
4230
4231     if (surface->flags & alloc_flag) return;
4232
4233     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4234     if (convert != NO_CONVERSION || format.convert) surface->flags |= SFLAG_CONVERTED;
4235     else surface->flags &= ~SFLAG_CONVERTED;
4236
4237     surface_bind_and_dirtify(surface, gl_info, srgb);
4238     surface_allocate_surface(surface, gl_info, &format, srgb);
4239     surface->flags |= alloc_flag;
4240 }
4241
4242 /* Context activation is done by the caller. */
4243 void surface_prepare_texture(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL srgb)
4244 {
4245     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4246     {
4247         struct wined3d_texture *texture = surface->container.u.texture;
4248         UINT sub_count = texture->level_count * texture->layer_count;
4249         UINT i;
4250
4251         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4252
4253         for (i = 0; i < sub_count; ++i)
4254         {
4255             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4256             surface_prepare_texture_internal(s, gl_info, srgb);
4257         }
4258
4259         return;
4260     }
4261
4262     surface_prepare_texture_internal(surface, gl_info, srgb);
4263 }
4264
4265 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4266         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4267 {
4268     struct wined3d_device *device = surface->resource.device;
4269     UINT pitch = wined3d_surface_get_pitch(surface);
4270     const struct wined3d_gl_info *gl_info;
4271     struct wined3d_context *context;
4272     RECT local_rect;
4273     UINT w, h;
4274
4275     surface_get_rect(surface, rect, &local_rect);
4276
4277     mem += local_rect.top * pitch + local_rect.left * bpp;
4278     w = local_rect.right - local_rect.left;
4279     h = local_rect.bottom - local_rect.top;
4280
4281     /* Activate the correct context for the render target */
4282     context = context_acquire(device, surface);
4283     context_apply_blit_state(context, device);
4284     gl_info = context->gl_info;
4285
4286     ENTER_GL();
4287
4288     if (!surface_is_offscreen(surface))
4289     {
4290         GLenum buffer = surface_get_gl_buffer(surface);
4291         TRACE("Unlocking %#x buffer.\n", buffer);
4292         context_set_draw_buffer(context, buffer);
4293
4294         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4295         glPixelZoom(1.0f, -1.0f);
4296     }
4297     else
4298     {
4299         /* Primary offscreen render target */
4300         TRACE("Offscreen render target.\n");
4301         context_set_draw_buffer(context, device->offscreenBuffer);
4302
4303         glPixelZoom(1.0f, 1.0f);
4304     }
4305
4306     glRasterPos3i(local_rect.left, local_rect.top, 1);
4307     checkGLcall("glRasterPos3i");
4308
4309     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4310     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4311
4312     if (surface->flags & SFLAG_PBO)
4313     {
4314         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4315         checkGLcall("glBindBufferARB");
4316     }
4317
4318     glDrawPixels(w, h, fmt, type, mem);
4319     checkGLcall("glDrawPixels");
4320
4321     if (surface->flags & SFLAG_PBO)
4322     {
4323         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4324         checkGLcall("glBindBufferARB");
4325     }
4326
4327     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4328     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4329
4330     LEAVE_GL();
4331
4332     if (wined3d_settings.strict_draw_ordering
4333             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4334             && surface->container.u.swapchain->front_buffer == surface))
4335         wglFlush();
4336
4337     context_release(context);
4338 }
4339
4340 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck,
4341         BOOL use_texturing, struct wined3d_format *format, CONVERT_TYPES *convert)
4342 {
4343     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4344     const struct wined3d_device *device = surface->resource.device;
4345     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4346     BOOL blit_supported = FALSE;
4347
4348     /* Copy the default values from the surface. Below we might perform fixups */
4349     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4350     *format = *surface->resource.format;
4351     *convert = NO_CONVERSION;
4352
4353     /* Ok, now look if we have to do any conversion */
4354     switch (surface->resource.format->id)
4355     {
4356         case WINED3DFMT_P8_UINT:
4357             /* Below the call to blit_supported is disabled for Wine 1.2
4358              * because the function isn't operating correctly yet. At the
4359              * moment 8-bit blits are handled in software and if certain GL
4360              * extensions are around, surface conversion is performed at
4361              * upload time. The blit_supported call recognizes it as a
4362              * destination fixup. This type of upload 'fixup' and 8-bit to
4363              * 8-bit blits need to be handled by the blit_shader.
4364              * TODO: get rid of this #if 0. */
4365 #if 0
4366             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4367                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4368                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4369 #endif
4370             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4371
4372             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4373              * texturing. Further also use conversion in case of color keying.
4374              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4375              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4376              * conflicts with this.
4377              */
4378             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4379                     || colorkey_active || !use_texturing)
4380             {
4381                 format->glFormat = GL_RGBA;
4382                 format->glInternal = GL_RGBA;
4383                 format->glType = GL_UNSIGNED_BYTE;
4384                 format->conv_byte_count = 4;
4385                 if (colorkey_active)
4386                     *convert = CONVERT_PALETTED_CK;
4387                 else
4388                     *convert = CONVERT_PALETTED;
4389             }
4390             break;
4391
4392         case WINED3DFMT_B2G3R3_UNORM:
4393             /* **********************
4394                 GL_UNSIGNED_BYTE_3_3_2
4395                 ********************** */
4396             if (colorkey_active) {
4397                 /* This texture format will never be used.. So do not care about color keying
4398                     up until the point in time it will be needed :-) */
4399                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4400             }
4401             break;
4402
4403         case WINED3DFMT_B5G6R5_UNORM:
4404             if (colorkey_active)
4405             {
4406                 *convert = CONVERT_CK_565;
4407                 format->glFormat = GL_RGBA;
4408                 format->glInternal = GL_RGB5_A1;
4409                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4410                 format->conv_byte_count = 2;
4411             }
4412             break;
4413
4414         case WINED3DFMT_B5G5R5X1_UNORM:
4415             if (colorkey_active)
4416             {
4417                 *convert = CONVERT_CK_5551;
4418                 format->glFormat = GL_BGRA;
4419                 format->glInternal = GL_RGB5_A1;
4420                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4421                 format->conv_byte_count = 2;
4422             }
4423             break;
4424
4425         case WINED3DFMT_B8G8R8_UNORM:
4426             if (colorkey_active)
4427             {
4428                 *convert = CONVERT_CK_RGB24;
4429                 format->glFormat = GL_RGBA;
4430                 format->glInternal = GL_RGBA8;
4431                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4432                 format->conv_byte_count = 4;
4433             }
4434             break;
4435
4436         case WINED3DFMT_B8G8R8X8_UNORM:
4437             if (colorkey_active)
4438             {
4439                 *convert = CONVERT_RGB32_888;
4440                 format->glFormat = GL_RGBA;
4441                 format->glInternal = GL_RGBA8;
4442                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4443                 format->conv_byte_count = 4;
4444             }
4445             break;
4446
4447         default:
4448             break;
4449     }
4450
4451     return WINED3D_OK;
4452 }
4453
4454 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4455 {
4456     const struct wined3d_device *device = surface->resource.device;
4457     const struct wined3d_palette *pal = surface->palette;
4458     BOOL index_in_alpha = FALSE;
4459     unsigned int i;
4460
4461     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4462      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4463      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4464      * duplicate entries. Store the color key in the unused alpha component to speed the
4465      * download up and to make conversion unneeded. */
4466     index_in_alpha = primary_render_target_is_p8(device);
4467
4468     if (!pal)
4469     {
4470         /* In DirectDraw the palette is a property of the surface, there are no such things as device palettes. */
4471         if (device->wined3d->flags & WINED3D_PALETTE_PER_SURFACE)
4472         {
4473             ERR("This code should never get entered for DirectDraw!, expect problems\n");
4474             if (index_in_alpha)
4475             {
4476                 /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4477                  * there's no palette at this time. */
4478                 for (i = 0; i < 256; i++) table[i][3] = i;
4479             }
4480         }
4481         else
4482         {
4483             /* Direct3D >= 8 palette usage style: P8 textures use device palettes, palette entry format is A8R8G8B8,
4484              * alpha is stored in peFlags and may be used by the app if D3DPTEXTURECAPS_ALPHAPALETTE device
4485              * capability flag is present (wine does advertise this capability) */
4486             for (i = 0; i < 256; ++i)
4487             {
4488                 table[i][0] = device->palettes[device->currentPalette][i].peRed;
4489                 table[i][1] = device->palettes[device->currentPalette][i].peGreen;
4490                 table[i][2] = device->palettes[device->currentPalette][i].peBlue;
4491                 table[i][3] = device->palettes[device->currentPalette][i].peFlags;
4492             }
4493         }
4494     }
4495     else
4496     {
4497         TRACE("Using surface palette %p\n", pal);
4498         /* Get the surface's palette */
4499         for (i = 0; i < 256; ++i)
4500         {
4501             table[i][0] = pal->palents[i].peRed;
4502             table[i][1] = pal->palents[i].peGreen;
4503             table[i][2] = pal->palents[i].peBlue;
4504
4505             /* When index_in_alpha is set the palette index is stored in the
4506              * alpha component. In case of a readback we can then read
4507              * GL_ALPHA. Color keying is handled in BltOverride using a
4508              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4509              * color key itself is passed to glAlphaFunc in other cases the
4510              * alpha component of pixels that should be masked away is set to 0. */
4511             if (index_in_alpha)
4512             {
4513                 table[i][3] = i;
4514             }
4515             else if (colorkey && (i >= surface->SrcBltCKey.dwColorSpaceLowValue)
4516                     && (i <= surface->SrcBltCKey.dwColorSpaceHighValue))
4517             {
4518                 table[i][3] = 0x00;
4519             }
4520             else if (pal->flags & WINEDDPCAPS_ALPHA)
4521             {
4522                 table[i][3] = pal->palents[i].peFlags;
4523             }
4524             else
4525             {
4526                 table[i][3] = 0xFF;
4527             }
4528         }
4529     }
4530 }
4531
4532 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width,
4533         UINT height, UINT outpitch, CONVERT_TYPES convert, struct wined3d_surface *surface)
4534 {
4535     const BYTE *source;
4536     BYTE *dest;
4537     TRACE("(%p)->(%p),(%d,%d,%d,%d,%p)\n", src, dst, pitch, height, outpitch, convert, surface);
4538
4539     switch (convert) {
4540         case NO_CONVERSION:
4541         {
4542             memcpy(dst, src, pitch * height);
4543             break;
4544         }
4545         case CONVERT_PALETTED:
4546         case CONVERT_PALETTED_CK:
4547         {
4548             BYTE table[256][4];
4549             unsigned int x, y;
4550
4551             d3dfmt_p8_init_palette(surface, table, (convert == CONVERT_PALETTED_CK));
4552
4553             for (y = 0; y < height; y++)
4554             {
4555                 source = src + pitch * y;
4556                 dest = dst + outpitch * y;
4557                 /* This is an 1 bpp format, using the width here is fine */
4558                 for (x = 0; x < width; x++) {
4559                     BYTE color = *source++;
4560                     *dest++ = table[color][0];
4561                     *dest++ = table[color][1];
4562                     *dest++ = table[color][2];
4563                     *dest++ = table[color][3];
4564                 }
4565             }
4566         }
4567         break;
4568
4569         case CONVERT_CK_565:
4570         {
4571             /* Converting the 565 format in 5551 packed to emulate color-keying.
4572
4573               Note : in all these conversion, it would be best to average the averaging
4574                       pixels to get the color of the pixel that will be color-keyed to
4575                       prevent 'color bleeding'. This will be done later on if ever it is
4576                       too visible.
4577
4578               Note2: Nvidia documents say that their driver does not support alpha + color keying
4579                      on the same surface and disables color keying in such a case
4580             */
4581             unsigned int x, y;
4582             const WORD *Source;
4583             WORD *Dest;
4584
4585             TRACE("Color keyed 565\n");
4586
4587             for (y = 0; y < height; y++) {
4588                 Source = (const WORD *)(src + y * pitch);
4589                 Dest = (WORD *) (dst + y * outpitch);
4590                 for (x = 0; x < width; x++ ) {
4591                     WORD color = *Source++;
4592                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4593                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4594                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4595                         *Dest |= 0x0001;
4596                     Dest++;
4597                 }
4598             }
4599         }
4600         break;
4601
4602         case CONVERT_CK_5551:
4603         {
4604             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4605             unsigned int x, y;
4606             const WORD *Source;
4607             WORD *Dest;
4608             TRACE("Color keyed 5551\n");
4609             for (y = 0; y < height; y++) {
4610                 Source = (const WORD *)(src + y * pitch);
4611                 Dest = (WORD *) (dst + y * outpitch);
4612                 for (x = 0; x < width; x++ ) {
4613                     WORD color = *Source++;
4614                     *Dest = color;
4615                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4616                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4617                         *Dest |= (1 << 15);
4618                     else
4619                         *Dest &= ~(1 << 15);
4620                     Dest++;
4621                 }
4622             }
4623         }
4624         break;
4625
4626         case CONVERT_CK_RGB24:
4627         {
4628             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4629             unsigned int x, y;
4630             for (y = 0; y < height; y++)
4631             {
4632                 source = src + pitch * y;
4633                 dest = dst + outpitch * y;
4634                 for (x = 0; x < width; x++) {
4635                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4636                     DWORD dstcolor = color << 8;
4637                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4638                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4639                         dstcolor |= 0xff;
4640                     *(DWORD*)dest = dstcolor;
4641                     source += 3;
4642                     dest += 4;
4643                 }
4644             }
4645         }
4646         break;
4647
4648         case CONVERT_RGB32_888:
4649         {
4650             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4651             unsigned int x, y;
4652             for (y = 0; y < height; y++)
4653             {
4654                 source = src + pitch * y;
4655                 dest = dst + outpitch * y;
4656                 for (x = 0; x < width; x++) {
4657                     DWORD color = 0xffffff & *(const DWORD*)source;
4658                     DWORD dstcolor = color << 8;
4659                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4660                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4661                         dstcolor |= 0xff;
4662                     *(DWORD*)dest = dstcolor;
4663                     source += 4;
4664                     dest += 4;
4665                 }
4666             }
4667         }
4668         break;
4669
4670         default:
4671             ERR("Unsupported conversion type %#x.\n", convert);
4672     }
4673     return WINED3D_OK;
4674 }
4675
4676 BOOL palette9_changed(struct wined3d_surface *surface)
4677 {
4678     struct wined3d_device *device = surface->resource.device;
4679
4680     if (surface->palette || (surface->resource.format->id != WINED3DFMT_P8_UINT
4681             && surface->resource.format->id != WINED3DFMT_P8_UINT_A8_UNORM))
4682     {
4683         /* If a ddraw-style palette is attached assume no d3d9 palette change.
4684          * Also the palette isn't interesting if the surface format isn't P8 or A8P8
4685          */
4686         return FALSE;
4687     }
4688
4689     if (surface->palette9)
4690     {
4691         if (!memcmp(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256))
4692         {
4693             return FALSE;
4694         }
4695     }
4696     else
4697     {
4698         surface->palette9 = HeapAlloc(GetProcessHeap(), 0, sizeof(PALETTEENTRY) * 256);
4699     }
4700     memcpy(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256);
4701
4702     return TRUE;
4703 }
4704
4705 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4706 {
4707     /* Flip the surface contents */
4708     /* Flip the DC */
4709     {
4710         HDC tmp;
4711         tmp = front->hDC;
4712         front->hDC = back->hDC;
4713         back->hDC = tmp;
4714     }
4715
4716     /* Flip the DIBsection */
4717     {
4718         HBITMAP tmp;
4719         BOOL hasDib = front->flags & SFLAG_DIBSECTION;
4720         tmp = front->dib.DIBsection;
4721         front->dib.DIBsection = back->dib.DIBsection;
4722         back->dib.DIBsection = tmp;
4723
4724         if (back->flags & SFLAG_DIBSECTION) front->flags |= SFLAG_DIBSECTION;
4725         else front->flags &= ~SFLAG_DIBSECTION;
4726         if (hasDib) back->flags |= SFLAG_DIBSECTION;
4727         else back->flags &= ~SFLAG_DIBSECTION;
4728     }
4729
4730     /* Flip the surface data */
4731     {
4732         void* tmp;
4733
4734         tmp = front->dib.bitmap_data;
4735         front->dib.bitmap_data = back->dib.bitmap_data;
4736         back->dib.bitmap_data = tmp;
4737
4738         tmp = front->resource.allocatedMemory;
4739         front->resource.allocatedMemory = back->resource.allocatedMemory;
4740         back->resource.allocatedMemory = tmp;
4741
4742         tmp = front->resource.heapMemory;
4743         front->resource.heapMemory = back->resource.heapMemory;
4744         back->resource.heapMemory = tmp;
4745     }
4746
4747     /* Flip the PBO */
4748     {
4749         GLuint tmp_pbo = front->pbo;
4750         front->pbo = back->pbo;
4751         back->pbo = tmp_pbo;
4752     }
4753
4754     /* client_memory should not be different, but just in case */
4755     {
4756         BOOL tmp;
4757         tmp = front->dib.client_memory;
4758         front->dib.client_memory = back->dib.client_memory;
4759         back->dib.client_memory = tmp;
4760     }
4761
4762     /* Flip the opengl texture */
4763     {
4764         GLuint tmp;
4765
4766         tmp = back->texture_name;
4767         back->texture_name = front->texture_name;
4768         front->texture_name = tmp;
4769
4770         tmp = back->texture_name_srgb;
4771         back->texture_name_srgb = front->texture_name_srgb;
4772         front->texture_name_srgb = tmp;
4773
4774         resource_unload(&back->resource);
4775         resource_unload(&front->resource);
4776     }
4777
4778     {
4779         DWORD tmp_flags = back->flags;
4780         back->flags = front->flags;
4781         front->flags = tmp_flags;
4782     }
4783 }
4784
4785 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4786  * pixel copy calls. */
4787 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4788         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4789 {
4790     struct wined3d_device *device = dst_surface->resource.device;
4791     float xrel, yrel;
4792     UINT row;
4793     struct wined3d_context *context;
4794     BOOL upsidedown = FALSE;
4795     RECT dst_rect = *dst_rect_in;
4796
4797     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4798      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4799      */
4800     if(dst_rect.top > dst_rect.bottom) {
4801         UINT tmp = dst_rect.bottom;
4802         dst_rect.bottom = dst_rect.top;
4803         dst_rect.top = tmp;
4804         upsidedown = TRUE;
4805     }
4806
4807     context = context_acquire(device, src_surface);
4808     context_apply_blit_state(context, device);
4809     surface_internal_preload(dst_surface, SRGB_RGB);
4810     ENTER_GL();
4811
4812     /* Bind the target texture */
4813     glBindTexture(dst_surface->texture_target, dst_surface->texture_name);
4814     checkGLcall("glBindTexture");
4815     if (surface_is_offscreen(src_surface))
4816     {
4817         TRACE("Reading from an offscreen target\n");
4818         upsidedown = !upsidedown;
4819         glReadBuffer(device->offscreenBuffer);
4820     }
4821     else
4822     {
4823         glReadBuffer(surface_get_gl_buffer(src_surface));
4824     }
4825     checkGLcall("glReadBuffer");
4826
4827     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4828     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4829
4830     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4831     {
4832         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4833
4834         if(Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT) {
4835             ERR("Texture filtering not supported in direct blit\n");
4836         }
4837     }
4838     else if ((Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT)
4839             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4840     {
4841         ERR("Texture filtering not supported in direct blit\n");
4842     }
4843
4844     if (upsidedown
4845             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4846             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4847     {
4848         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4849
4850         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4851                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4852                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4853                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4854     }
4855     else
4856     {
4857         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4858         /* I have to process this row by row to swap the image,
4859          * otherwise it would be upside down, so stretching in y direction
4860          * doesn't cost extra time
4861          *
4862          * However, stretching in x direction can be avoided if not necessary
4863          */
4864         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4865             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4866             {
4867                 /* Well, that stuff works, but it's very slow.
4868                  * find a better way instead
4869                  */
4870                 UINT col;
4871
4872                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4873                 {
4874                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4875                             dst_rect.left + col /* x offset */, row /* y offset */,
4876                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4877                 }
4878             }
4879             else
4880             {
4881                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4882                         dst_rect.left /* x offset */, row /* y offset */,
4883                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4884             }
4885         }
4886     }
4887     checkGLcall("glCopyTexSubImage2D");
4888
4889     LEAVE_GL();
4890     context_release(context);
4891
4892     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4893      * path is never entered
4894      */
4895     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4896 }
4897
4898 /* Uses the hardware to stretch and flip the image */
4899 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4900         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4901 {
4902     struct wined3d_device *device = dst_surface->resource.device;
4903     struct wined3d_swapchain *src_swapchain = NULL;
4904     GLuint src, backup = 0;
4905     float left, right, top, bottom; /* Texture coordinates */
4906     UINT fbwidth = src_surface->resource.width;
4907     UINT fbheight = src_surface->resource.height;
4908     struct wined3d_context *context;
4909     GLenum drawBuffer = GL_BACK;
4910     GLenum texture_target;
4911     BOOL noBackBufferBackup;
4912     BOOL src_offscreen;
4913     BOOL upsidedown = FALSE;
4914     RECT dst_rect = *dst_rect_in;
4915
4916     TRACE("Using hwstretch blit\n");
4917     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4918     context = context_acquire(device, src_surface);
4919     context_apply_blit_state(context, device);
4920     surface_internal_preload(dst_surface, SRGB_RGB);
4921
4922     src_offscreen = surface_is_offscreen(src_surface);
4923     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4924     if (!noBackBufferBackup && !src_surface->texture_name)
4925     {
4926         /* Get it a description */
4927         surface_internal_preload(src_surface, SRGB_RGB);
4928     }
4929     ENTER_GL();
4930
4931     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4932      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4933      */
4934     if (context->aux_buffers >= 2)
4935     {
4936         /* Got more than one aux buffer? Use the 2nd aux buffer */
4937         drawBuffer = GL_AUX1;
4938     }
4939     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4940     {
4941         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
4942         drawBuffer = GL_AUX0;
4943     }
4944
4945     if(noBackBufferBackup) {
4946         glGenTextures(1, &backup);
4947         checkGLcall("glGenTextures");
4948         glBindTexture(GL_TEXTURE_2D, backup);
4949         checkGLcall("glBindTexture(GL_TEXTURE_2D, backup)");
4950         texture_target = GL_TEXTURE_2D;
4951     } else {
4952         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
4953          * we are reading from the back buffer, the backup can be used as source texture
4954          */
4955         texture_target = src_surface->texture_target;
4956         glBindTexture(texture_target, src_surface->texture_name);
4957         checkGLcall("glBindTexture(texture_target, src_surface->texture_name)");
4958         glEnable(texture_target);
4959         checkGLcall("glEnable(texture_target)");
4960
4961         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
4962         src_surface->flags &= ~SFLAG_INTEXTURE;
4963     }
4964
4965     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4966      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4967      */
4968     if(dst_rect.top > dst_rect.bottom) {
4969         UINT tmp = dst_rect.bottom;
4970         dst_rect.bottom = dst_rect.top;
4971         dst_rect.top = tmp;
4972         upsidedown = TRUE;
4973     }
4974
4975     if (src_offscreen)
4976     {
4977         TRACE("Reading from an offscreen target\n");
4978         upsidedown = !upsidedown;
4979         glReadBuffer(device->offscreenBuffer);
4980     }
4981     else
4982     {
4983         glReadBuffer(surface_get_gl_buffer(src_surface));
4984     }
4985
4986     /* TODO: Only back up the part that will be overwritten */
4987     glCopyTexSubImage2D(texture_target, 0,
4988                         0, 0 /* read offsets */,
4989                         0, 0,
4990                         fbwidth,
4991                         fbheight);
4992
4993     checkGLcall("glCopyTexSubImage2D");
4994
4995     /* No issue with overriding these - the sampler is dirty due to blit usage */
4996     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
4997             wined3d_gl_mag_filter(magLookup, Filter));
4998     checkGLcall("glTexParameteri");
4999     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
5000             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
5001     checkGLcall("glTexParameteri");
5002
5003     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5004         src_swapchain = src_surface->container.u.swapchain;
5005     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
5006     {
5007         src = backup ? backup : src_surface->texture_name;
5008     }
5009     else
5010     {
5011         glReadBuffer(GL_FRONT);
5012         checkGLcall("glReadBuffer(GL_FRONT)");
5013
5014         glGenTextures(1, &src);
5015         checkGLcall("glGenTextures(1, &src)");
5016         glBindTexture(GL_TEXTURE_2D, src);
5017         checkGLcall("glBindTexture(GL_TEXTURE_2D, src)");
5018
5019         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5020          * out for power of 2 sizes
5021          */
5022         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5023                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5024         checkGLcall("glTexImage2D");
5025         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
5026                             0, 0 /* read offsets */,
5027                             0, 0,
5028                             fbwidth,
5029                             fbheight);
5030
5031         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5032         checkGLcall("glTexParameteri");
5033         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5034         checkGLcall("glTexParameteri");
5035
5036         glReadBuffer(GL_BACK);
5037         checkGLcall("glReadBuffer(GL_BACK)");
5038
5039         if(texture_target != GL_TEXTURE_2D) {
5040             glDisable(texture_target);
5041             glEnable(GL_TEXTURE_2D);
5042             texture_target = GL_TEXTURE_2D;
5043         }
5044     }
5045     checkGLcall("glEnd and previous");
5046
5047     left = src_rect->left;
5048     right = src_rect->right;
5049
5050     if (!upsidedown)
5051     {
5052         top = src_surface->resource.height - src_rect->top;
5053         bottom = src_surface->resource.height - src_rect->bottom;
5054     }
5055     else
5056     {
5057         top = src_surface->resource.height - src_rect->bottom;
5058         bottom = src_surface->resource.height - src_rect->top;
5059     }
5060
5061     if (src_surface->flags & SFLAG_NORMCOORD)
5062     {
5063         left /= src_surface->pow2Width;
5064         right /= src_surface->pow2Width;
5065         top /= src_surface->pow2Height;
5066         bottom /= src_surface->pow2Height;
5067     }
5068
5069     /* draw the source texture stretched and upside down. The correct surface is bound already */
5070     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5071     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5072
5073     context_set_draw_buffer(context, drawBuffer);
5074     glReadBuffer(drawBuffer);
5075
5076     glBegin(GL_QUADS);
5077         /* bottom left */
5078         glTexCoord2f(left, bottom);
5079         glVertex2i(0, 0);
5080
5081         /* top left */
5082         glTexCoord2f(left, top);
5083         glVertex2i(0, dst_rect.bottom - dst_rect.top);
5084
5085         /* top right */
5086         glTexCoord2f(right, top);
5087         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5088
5089         /* bottom right */
5090         glTexCoord2f(right, bottom);
5091         glVertex2i(dst_rect.right - dst_rect.left, 0);
5092     glEnd();
5093     checkGLcall("glEnd and previous");
5094
5095     if (texture_target != dst_surface->texture_target)
5096     {
5097         glDisable(texture_target);
5098         glEnable(dst_surface->texture_target);
5099         texture_target = dst_surface->texture_target;
5100     }
5101
5102     /* Now read the stretched and upside down image into the destination texture */
5103     glBindTexture(texture_target, dst_surface->texture_name);
5104     checkGLcall("glBindTexture");
5105     glCopyTexSubImage2D(texture_target,
5106                         0,
5107                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5108                         0, 0, /* We blitted the image to the origin */
5109                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5110     checkGLcall("glCopyTexSubImage2D");
5111
5112     if(drawBuffer == GL_BACK) {
5113         /* Write the back buffer backup back */
5114         if(backup) {
5115             if(texture_target != GL_TEXTURE_2D) {
5116                 glDisable(texture_target);
5117                 glEnable(GL_TEXTURE_2D);
5118                 texture_target = GL_TEXTURE_2D;
5119             }
5120             glBindTexture(GL_TEXTURE_2D, backup);
5121             checkGLcall("glBindTexture(GL_TEXTURE_2D, backup)");
5122         }
5123         else
5124         {
5125             if (texture_target != src_surface->texture_target)
5126             {
5127                 glDisable(texture_target);
5128                 glEnable(src_surface->texture_target);
5129                 texture_target = src_surface->texture_target;
5130             }
5131             glBindTexture(src_surface->texture_target, src_surface->texture_name);
5132             checkGLcall("glBindTexture(src_surface->texture_target, src_surface->texture_name)");
5133         }
5134
5135         glBegin(GL_QUADS);
5136             /* top left */
5137             glTexCoord2f(0.0f, 0.0f);
5138             glVertex2i(0, fbheight);
5139
5140             /* bottom left */
5141             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5142             glVertex2i(0, 0);
5143
5144             /* bottom right */
5145             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5146                     (float)fbheight / (float)src_surface->pow2Height);
5147             glVertex2i(fbwidth, 0);
5148
5149             /* top right */
5150             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5151             glVertex2i(fbwidth, fbheight);
5152         glEnd();
5153     }
5154     glDisable(texture_target);
5155     checkGLcall("glDisable(texture_target)");
5156
5157     /* Cleanup */
5158     if (src != src_surface->texture_name && src != backup)
5159     {
5160         glDeleteTextures(1, &src);
5161         checkGLcall("glDeleteTextures(1, &src)");
5162     }
5163     if(backup) {
5164         glDeleteTextures(1, &backup);
5165         checkGLcall("glDeleteTextures(1, &backup)");
5166     }
5167
5168     LEAVE_GL();
5169
5170     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5171
5172     context_release(context);
5173
5174     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5175      * path is never entered
5176      */
5177     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5178 }
5179
5180 /* Front buffer coordinates are always full screen coordinates, but our GL
5181  * drawable is limited to the window's client area. The sysmem and texture
5182  * copies do have the full screen size. Note that GL has a bottom-left
5183  * origin, while D3D has a top-left origin. */
5184 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5185 {
5186     UINT drawable_height;
5187
5188     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5189             && surface == surface->container.u.swapchain->front_buffer)
5190     {
5191         POINT offset = {0, 0};
5192         RECT windowsize;
5193
5194         ScreenToClient(window, &offset);
5195         OffsetRect(rect, offset.x, offset.y);
5196
5197         GetClientRect(window, &windowsize);
5198         drawable_height = windowsize.bottom - windowsize.top;
5199     }
5200     else
5201     {
5202         drawable_height = surface->resource.height;
5203     }
5204
5205     rect->top = drawable_height - rect->top;
5206     rect->bottom = drawable_height - rect->bottom;
5207 }
5208
5209 static void surface_blt_to_drawable(struct wined3d_device *device,
5210         WINED3DTEXTUREFILTERTYPE filter, BOOL color_key,
5211         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5212         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5213 {
5214     struct wined3d_context *context;
5215     RECT src_rect, dst_rect;
5216
5217     src_rect = *src_rect_in;
5218     dst_rect = *dst_rect_in;
5219
5220     /* Make sure the surface is up-to-date. This should probably use
5221      * surface_load_location() and worry about the destination surface too,
5222      * unless we're overwriting it completely. */
5223     surface_internal_preload(src_surface, SRGB_RGB);
5224
5225     /* Activate the destination context, set it up for blitting */
5226     context = context_acquire(device, dst_surface);
5227     context_apply_blit_state(context, device);
5228
5229     if (!surface_is_offscreen(dst_surface))
5230         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5231
5232     device->blitter->set_shader(device->blit_priv, context->gl_info, src_surface);
5233
5234     ENTER_GL();
5235
5236     if (color_key)
5237     {
5238         glEnable(GL_ALPHA_TEST);
5239         checkGLcall("glEnable(GL_ALPHA_TEST)");
5240
5241         /* When the primary render target uses P8, the alpha component
5242          * contains the palette index. Which means that the colorkey is one of
5243          * the palette entries. In other cases pixels that should be masked
5244          * away have alpha set to 0. */
5245         if (primary_render_target_is_p8(device))
5246             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->SrcBltCKey.dwColorSpaceLowValue / 256.0f);
5247         else
5248             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5249         checkGLcall("glAlphaFunc");
5250     }
5251     else
5252     {
5253         glDisable(GL_ALPHA_TEST);
5254         checkGLcall("glDisable(GL_ALPHA_TEST)");
5255     }
5256
5257     draw_textured_quad(src_surface, &src_rect, &dst_rect, filter);
5258
5259     if (color_key)
5260     {
5261         glDisable(GL_ALPHA_TEST);
5262         checkGLcall("glDisable(GL_ALPHA_TEST)");
5263     }
5264
5265     LEAVE_GL();
5266
5267     /* Leave the opengl state valid for blitting */
5268     device->blitter->unset_shader(context->gl_info);
5269
5270     if (wined3d_settings.strict_draw_ordering
5271             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5272             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5273         wglFlush(); /* Flush to ensure ordering across contexts. */
5274
5275     context_release(context);
5276 }
5277
5278 /* Do not call while under the GL lock. */
5279 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const WINED3DCOLORVALUE *color)
5280 {
5281     struct wined3d_device *device = s->resource.device;
5282     const struct blit_shader *blitter;
5283
5284     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5285             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5286     if (!blitter)
5287     {
5288         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5289         return WINED3DERR_INVALIDCALL;
5290     }
5291
5292     return blitter->color_fill(device, s, rect, color);
5293 }
5294
5295 /* Do not call while under the GL lock. */
5296 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5297         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5298         WINED3DTEXTUREFILTERTYPE Filter)
5299 {
5300     struct wined3d_device *device = dst_surface->resource.device;
5301     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5302     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5303
5304     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5305             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5306             flags, DDBltFx, debug_d3dtexturefiltertype(Filter));
5307
5308     /* Get the swapchain. One of the surfaces has to be a primary surface */
5309     if (dst_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5310     {
5311         WARN("Destination is in sysmem, rejecting gl blt\n");
5312         return WINED3DERR_INVALIDCALL;
5313     }
5314
5315     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5316         dstSwapchain = dst_surface->container.u.swapchain;
5317
5318     if (src_surface)
5319     {
5320         if (src_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5321         {
5322             WARN("Src is in sysmem, rejecting gl blt\n");
5323             return WINED3DERR_INVALIDCALL;
5324         }
5325
5326         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5327             srcSwapchain = src_surface->container.u.swapchain;
5328     }
5329
5330     /* Early sort out of cases where no render target is used */
5331     if (!dstSwapchain && !srcSwapchain
5332             && src_surface != device->fb.render_targets[0]
5333             && dst_surface != device->fb.render_targets[0])
5334     {
5335         TRACE("No surface is render target, not using hardware blit.\n");
5336         return WINED3DERR_INVALIDCALL;
5337     }
5338
5339     /* No destination color keying supported */
5340     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5341     {
5342         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5343         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5344         return WINED3DERR_INVALIDCALL;
5345     }
5346
5347     /* The only case where both surfaces on a swapchain are supported is a back buffer -> front buffer blit on the same swapchain */
5348     if (dstSwapchain && dstSwapchain == srcSwapchain && dstSwapchain->back_buffers
5349             && dst_surface == dstSwapchain->front_buffer
5350             && src_surface == dstSwapchain->back_buffers[0])
5351     {
5352         /* Half-Life does a Blt from the back buffer to the front buffer,
5353          * Full surface size, no flags... Use present instead
5354          *
5355          * This path will only be entered for d3d7 and ddraw apps, because d3d8/9 offer no way to blit TO the front buffer
5356          */
5357
5358         /* Check rects - wined3d_swapchain_present() doesn't handle them. */
5359         for (;;)
5360         {
5361             TRACE("Looking if a Present can be done...\n");
5362             /* Source Rectangle must be full surface */
5363             if (src_rect->left || src_rect->top
5364                     || src_rect->right != src_surface->resource.width
5365                     || src_rect->bottom != src_surface->resource.height)
5366             {
5367                 TRACE("No, Source rectangle doesn't match\n");
5368                 break;
5369             }
5370
5371             /* No stretching may occur */
5372             if (src_rect->right != dst_rect->right - dst_rect->left
5373                     || src_rect->bottom != dst_rect->bottom - dst_rect->top)
5374             {
5375                 TRACE("No, stretching is done\n");
5376                 break;
5377             }
5378
5379             /* Destination must be full surface or match the clipping rectangle */
5380             if (dst_surface->clipper && dst_surface->clipper->hWnd)
5381             {
5382                 RECT cliprect;
5383                 POINT pos[2];
5384                 GetClientRect(dst_surface->clipper->hWnd, &cliprect);
5385                 pos[0].x = dst_rect->left;
5386                 pos[0].y = dst_rect->top;
5387                 pos[1].x = dst_rect->right;
5388                 pos[1].y = dst_rect->bottom;
5389                 MapWindowPoints(GetDesktopWindow(), dst_surface->clipper->hWnd, pos, 2);
5390
5391                 if (pos[0].x != cliprect.left || pos[0].y != cliprect.top
5392                         || pos[1].x != cliprect.right || pos[1].y != cliprect.bottom)
5393                 {
5394                     TRACE("No, dest rectangle doesn't match(clipper)\n");
5395                     TRACE("Clip rect at %s\n", wine_dbgstr_rect(&cliprect));
5396                     TRACE("Blt dest: %s\n", wine_dbgstr_rect(dst_rect));
5397                     break;
5398                 }
5399             }
5400             else if (dst_rect->left || dst_rect->top
5401                     || dst_rect->right != dst_surface->resource.width
5402                     || dst_rect->bottom != dst_surface->resource.height)
5403             {
5404                 TRACE("No, dest rectangle doesn't match(surface size)\n");
5405                 break;
5406             }
5407
5408             TRACE("Yes\n");
5409
5410             /* These flags are unimportant for the flag check, remove them */
5411             if (!(flags & ~(WINEDDBLT_DONOTWAIT | WINEDDBLT_WAIT)))
5412             {
5413                 WINED3DSWAPEFFECT orig_swap = dstSwapchain->presentParms.SwapEffect;
5414
5415                 /* The idea behind this is that a glReadPixels and a glDrawPixels call
5416                     * take very long, while a flip is fast.
5417                     * This applies to Half-Life, which does such Blts every time it finished
5418                     * a frame, and to Prince of Persia 3D, which uses this to draw at least the main
5419                     * menu. This is also used by all apps when they do windowed rendering
5420                     *
5421                     * The problem is that flipping is not really the same as copying. After a
5422                     * Blt the front buffer is a copy of the back buffer, and the back buffer is
5423                     * untouched. Therefore it's necessary to override the swap effect
5424                     * and to set it back after the flip.
5425                     *
5426                     * Windowed Direct3D < 7 apps do the same. The D3D7 sdk demos are nice
5427                     * testcases.
5428                     */
5429
5430                 dstSwapchain->presentParms.SwapEffect = WINED3DSWAPEFFECT_COPY;
5431                 dstSwapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_IMMEDIATE;
5432
5433                 TRACE("Full screen back buffer -> front buffer blt, performing a flip instead.\n");
5434                 wined3d_swapchain_present(dstSwapchain, NULL, NULL, dstSwapchain->win_handle, NULL, 0);
5435
5436                 dstSwapchain->presentParms.SwapEffect = orig_swap;
5437
5438                 return WINED3D_OK;
5439             }
5440             break;
5441         }
5442
5443         TRACE("Unsupported blit between buffers on the same swapchain\n");
5444         return WINED3DERR_INVALIDCALL;
5445     } else if(dstSwapchain && dstSwapchain == srcSwapchain) {
5446         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5447         return WINED3DERR_INVALIDCALL;
5448     } else if(dstSwapchain && srcSwapchain) {
5449         FIXME("Implement hardware blit between two different swapchains\n");
5450         return WINED3DERR_INVALIDCALL;
5451     }
5452     else if (dstSwapchain)
5453     {
5454         /* Handled with regular texture -> swapchain blit */
5455         if (src_surface == device->fb.render_targets[0])
5456             TRACE("Blit from active render target to a swapchain\n");
5457     }
5458     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5459     {
5460         FIXME("Implement blit from a swapchain to the active render target\n");
5461         return WINED3DERR_INVALIDCALL;
5462     }
5463
5464     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5465     {
5466         /* Blit from render target to texture */
5467         BOOL stretchx;
5468
5469         /* P8 read back is not implemented */
5470         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5471                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5472         {
5473             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5474             return WINED3DERR_INVALIDCALL;
5475         }
5476
5477         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5478         {
5479             TRACE("Color keying not supported by frame buffer to texture blit\n");
5480             return WINED3DERR_INVALIDCALL;
5481             /* Destination color key is checked above */
5482         }
5483
5484         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5485             stretchx = TRUE;
5486         else
5487             stretchx = FALSE;
5488
5489         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5490          * flip the image nor scale it.
5491          *
5492          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5493          * -> If the app wants a image width an unscaled width, copy it line per line
5494          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5495          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5496          *    back buffer. This is slower than reading line per line, thus not used for flipping
5497          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5498          *    pixel by pixel. */
5499         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5500                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5501         {
5502             TRACE("No stretching in x direction, using direct framebuffer -> texture copy\n");
5503             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, Filter);
5504         } else {
5505             TRACE("Using hardware stretching to flip / stretch the texture\n");
5506             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, Filter);
5507         }
5508
5509         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5510         {
5511             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5512             dst_surface->resource.allocatedMemory = NULL;
5513             dst_surface->resource.heapMemory = NULL;
5514         }
5515         else
5516         {
5517             dst_surface->flags &= ~SFLAG_INSYSMEM;
5518         }
5519
5520         return WINED3D_OK;
5521     }
5522     else if (src_surface)
5523     {
5524         /* Blit from offscreen surface to render target */
5525         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5526         WINEDDCOLORKEY oldBltCKey = src_surface->SrcBltCKey;
5527
5528         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5529
5530         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5531                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5532                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5533         {
5534             FIXME("Unsupported blit operation falling back to software\n");
5535             return WINED3DERR_INVALIDCALL;
5536         }
5537
5538         /* Color keying: Check if we have to do a color keyed blt,
5539          * and if not check if a color key is activated.
5540          *
5541          * Just modify the color keying parameters in the surface and restore them afterwards
5542          * The surface keeps track of the color key last used to load the opengl surface.
5543          * PreLoad will catch the change to the flags and color key and reload if necessary.
5544          */
5545         if (flags & WINEDDBLT_KEYSRC)
5546         {
5547             /* Use color key from surface */
5548         }
5549         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5550         {
5551             /* Use color key from DDBltFx */
5552             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5553             src_surface->SrcBltCKey = DDBltFx->ddckSrcColorkey;
5554         }
5555         else
5556         {
5557             /* Do not use color key */
5558             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5559         }
5560
5561         surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5562                 src_surface, src_rect, dst_surface, dst_rect);
5563
5564         /* Restore the color key parameters */
5565         src_surface->CKeyFlags = oldCKeyFlags;
5566         src_surface->SrcBltCKey = oldBltCKey;
5567
5568         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5569
5570         return WINED3D_OK;
5571     }
5572
5573     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5574     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5575     return WINED3DERR_INVALIDCALL;
5576 }
5577
5578 /* GL locking is done by the caller */
5579 static void surface_depth_blt(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
5580         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5581 {
5582     struct wined3d_device *device = surface->resource.device;
5583     GLint compare_mode = GL_NONE;
5584     struct blt_info info;
5585     GLint old_binding = 0;
5586     RECT rect;
5587
5588     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5589
5590     glDisable(GL_CULL_FACE);
5591     glDisable(GL_BLEND);
5592     glDisable(GL_ALPHA_TEST);
5593     glDisable(GL_SCISSOR_TEST);
5594     glDisable(GL_STENCIL_TEST);
5595     glEnable(GL_DEPTH_TEST);
5596     glDepthFunc(GL_ALWAYS);
5597     glDepthMask(GL_TRUE);
5598     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5599     glViewport(x, y, w, h);
5600
5601     SetRect(&rect, 0, h, w, 0);
5602     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5603     GL_EXTCALL(glActiveTextureARB(GL_TEXTURE0_ARB));
5604     glGetIntegerv(info.binding, &old_binding);
5605     glBindTexture(info.bind_target, texture);
5606     if (gl_info->supported[ARB_SHADOW])
5607     {
5608         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5609         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5610     }
5611
5612     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5613             gl_info, info.tex_type, &surface->ds_current_size);
5614
5615     glBegin(GL_TRIANGLE_STRIP);
5616     glTexCoord3fv(info.coords[0]);
5617     glVertex2f(-1.0f, -1.0f);
5618     glTexCoord3fv(info.coords[1]);
5619     glVertex2f(1.0f, -1.0f);
5620     glTexCoord3fv(info.coords[2]);
5621     glVertex2f(-1.0f, 1.0f);
5622     glTexCoord3fv(info.coords[3]);
5623     glVertex2f(1.0f, 1.0f);
5624     glEnd();
5625
5626     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5627     glBindTexture(info.bind_target, old_binding);
5628
5629     glPopAttrib();
5630
5631     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5632 }
5633
5634 void surface_modify_ds_location(struct wined3d_surface *surface,
5635         DWORD location, UINT w, UINT h)
5636 {
5637     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5638
5639     if (location & ~SFLAG_DS_LOCATIONS)
5640         FIXME("Invalid location (%#x) specified.\n", location);
5641
5642     surface->ds_current_size.cx = w;
5643     surface->ds_current_size.cy = h;
5644     surface->flags &= ~SFLAG_DS_LOCATIONS;
5645     surface->flags |= location;
5646 }
5647
5648 /* Context activation is done by the caller. */
5649 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5650 {
5651     struct wined3d_device *device = surface->resource.device;
5652     const struct wined3d_gl_info *gl_info = context->gl_info;
5653     GLsizei w, h;
5654
5655     TRACE("surface %p, new location %#x.\n", surface, location);
5656
5657     /* TODO: Make this work for modes other than FBO */
5658     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5659
5660     if (!(surface->flags & location))
5661     {
5662         w = surface->ds_current_size.cx;
5663         h = surface->ds_current_size.cy;
5664         surface->ds_current_size.cx = 0;
5665         surface->ds_current_size.cy = 0;
5666     }
5667     else
5668     {
5669         w = surface->resource.width;
5670         h = surface->resource.height;
5671     }
5672
5673     if (surface->ds_current_size.cx == surface->resource.width
5674             && surface->ds_current_size.cy == surface->resource.height)
5675     {
5676         TRACE("Location (%#x) is already up to date.\n", location);
5677         return;
5678     }
5679
5680     if (surface->current_renderbuffer)
5681     {
5682         FIXME("Not supported with fixed up depth stencil.\n");
5683         return;
5684     }
5685
5686     if (!(surface->flags & SFLAG_DS_LOCATIONS))
5687     {
5688         /* This mostly happens when a depth / stencil is used without being
5689          * cleared first. In principle we could upload from sysmem, or
5690          * explicitly clear before first usage. For the moment there don't
5691          * appear to be a lot of applications depending on this, so a FIXME
5692          * should do. */
5693         FIXME("No up to date depth stencil location.\n");
5694         surface->flags |= location;
5695         surface->ds_current_size.cx = surface->resource.width;
5696         surface->ds_current_size.cy = surface->resource.height;
5697         return;
5698     }
5699
5700     if (location == SFLAG_DS_OFFSCREEN)
5701     {
5702         GLint old_binding = 0;
5703         GLenum bind_target;
5704
5705         /* The render target is allowed to be smaller than the depth/stencil
5706          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5707          * than the offscreen surface. Don't overwrite the offscreen surface
5708          * with undefined data. */
5709         w = min(w, context->swapchain->presentParms.BackBufferWidth);
5710         h = min(h, context->swapchain->presentParms.BackBufferHeight);
5711
5712         TRACE("Copying onscreen depth buffer to depth texture.\n");
5713
5714         ENTER_GL();
5715
5716         if (!device->depth_blt_texture)
5717         {
5718             glGenTextures(1, &device->depth_blt_texture);
5719         }
5720
5721         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5722          * directly on the FBO texture. That's because we need to flip. */
5723         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5724                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5725         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5726         {
5727             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5728             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5729         }
5730         else
5731         {
5732             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5733             bind_target = GL_TEXTURE_2D;
5734         }
5735         glBindTexture(bind_target, device->depth_blt_texture);
5736         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5737          * internal format, because the internal format might include stencil
5738          * data. In principle we should copy stencil data as well, but unless
5739          * the driver supports stencil export it's hard to do, and doesn't
5740          * seem to be needed in practice. If the hardware doesn't support
5741          * writing stencil data, the glCopyTexImage2D() call might trigger
5742          * software fallbacks. */
5743         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5744         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5745         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5746         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5747         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5748         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5749         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5750         glBindTexture(bind_target, old_binding);
5751
5752         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5753                 NULL, surface, SFLAG_INTEXTURE);
5754         context_set_draw_buffer(context, GL_NONE);
5755         glReadBuffer(GL_NONE);
5756
5757         /* Do the actual blit */
5758         surface_depth_blt(surface, gl_info, device->depth_blt_texture, 0, 0, w, h, bind_target);
5759         checkGLcall("depth_blt");
5760
5761         context_invalidate_state(context, STATE_FRAMEBUFFER);
5762
5763         LEAVE_GL();
5764
5765         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5766     }
5767     else if (location == SFLAG_DS_ONSCREEN)
5768     {
5769         TRACE("Copying depth texture to onscreen depth buffer.\n");
5770
5771         ENTER_GL();
5772
5773         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5774                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5775         surface_depth_blt(surface, gl_info, surface->texture_name,
5776                 0, surface->pow2Height - h, w, h, surface->texture_target);
5777         checkGLcall("depth_blt");
5778
5779         context_invalidate_state(context, STATE_FRAMEBUFFER);
5780
5781         LEAVE_GL();
5782
5783         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5784     }
5785     else
5786     {
5787         ERR("Invalid location (%#x) specified.\n", location);
5788     }
5789
5790     surface->flags |= location;
5791     surface->ds_current_size.cx = surface->resource.width;
5792     surface->ds_current_size.cy = surface->resource.height;
5793 }
5794
5795 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5796 {
5797     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5798     struct wined3d_surface *overlay;
5799
5800     TRACE("surface %p, location %s, persistent %#x.\n",
5801             surface, debug_surflocation(location), persistent);
5802
5803     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
5804     {
5805         if (surface_is_offscreen(surface))
5806         {
5807             /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same
5808              * for offscreen targets. */
5809             if (location & (SFLAG_INTEXTURE | SFLAG_INDRAWABLE))
5810                 location |= (SFLAG_INTEXTURE | SFLAG_INDRAWABLE);
5811         }
5812         else
5813         {
5814             TRACE("Surface %p is an onscreen surface.\n", surface);
5815         }
5816     }
5817
5818     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5819             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5820         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5821
5822     if (persistent)
5823     {
5824         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5825                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5826         {
5827             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5828             {
5829                 TRACE("Passing to container.\n");
5830                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5831             }
5832         }
5833         surface->flags &= ~SFLAG_LOCATIONS;
5834         surface->flags |= location;
5835
5836         /* Redraw emulated overlays, if any */
5837         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5838         {
5839             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5840             {
5841                 overlay->surface_ops->surface_draw_overlay(overlay);
5842             }
5843         }
5844     }
5845     else
5846     {
5847         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5848         {
5849             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5850             {
5851                 TRACE("Passing to container\n");
5852                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5853             }
5854         }
5855         surface->flags &= ~location;
5856     }
5857
5858     if (!(surface->flags & SFLAG_LOCATIONS))
5859     {
5860         ERR("Surface %p does not have any up to date location.\n", surface);
5861     }
5862 }
5863
5864 static DWORD resource_access_from_location(DWORD location)
5865 {
5866     switch (location)
5867     {
5868         case SFLAG_INSYSMEM:
5869             return WINED3D_RESOURCE_ACCESS_CPU;
5870
5871         case SFLAG_INDRAWABLE:
5872         case SFLAG_INSRGBTEX:
5873         case SFLAG_INTEXTURE:
5874             return WINED3D_RESOURCE_ACCESS_GPU;
5875
5876         default:
5877             FIXME("Unhandled location %#x.\n", location);
5878             return 0;
5879     }
5880 }
5881
5882 static void surface_load_sysmem(struct wined3d_surface *surface,
5883         const struct wined3d_gl_info *gl_info, const RECT *rect)
5884 {
5885     surface_prepare_system_memory(surface);
5886
5887     /* Download the surface to system memory. */
5888     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5889     {
5890         struct wined3d_device *device = surface->resource.device;
5891         struct wined3d_context *context = NULL;
5892
5893         if (!device->isInDraw)
5894             context = context_acquire(device, NULL);
5895
5896         surface_bind_and_dirtify(surface, gl_info, !(surface->flags & SFLAG_INTEXTURE));
5897         surface_download_data(surface, gl_info);
5898
5899         if (context)
5900             context_release(context);
5901
5902         return;
5903     }
5904
5905     /* Note: It might be faster to download into a texture first. */
5906     read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5907             wined3d_surface_get_pitch(surface));
5908 }
5909
5910 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5911         const struct wined3d_gl_info *gl_info, const RECT *rect)
5912 {
5913     struct wined3d_device *device = surface->resource.device;
5914     struct wined3d_format format;
5915     CONVERT_TYPES convert;
5916     UINT byte_count;
5917     BYTE *mem;
5918
5919     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5920         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5921
5922     if (surface->flags & SFLAG_INTEXTURE)
5923     {
5924         RECT r;
5925
5926         surface_get_rect(surface, rect, &r);
5927         surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
5928
5929         return WINED3D_OK;
5930     }
5931
5932     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5933     {
5934         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5935          * path through sysmem. */
5936         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5937     }
5938
5939     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5940
5941     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5942      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5943      * called. */
5944     if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
5945     {
5946         struct wined3d_context *context = NULL;
5947
5948         TRACE("Removing the pbo attached to surface %p.\n", surface);
5949
5950         if (!device->isInDraw)
5951             context = context_acquire(device, NULL);
5952
5953         surface_remove_pbo(surface, gl_info);
5954
5955         if (context)
5956             context_release(context);
5957     }
5958
5959     if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
5960     {
5961         UINT height = surface->resource.height;
5962         UINT width = surface->resource.width;
5963         UINT src_pitch, dst_pitch;
5964
5965         byte_count = format.conv_byte_count;
5966         src_pitch = wined3d_surface_get_pitch(surface);
5967
5968         /* Stick to the alignment for the converted surface too, makes it
5969          * easier to load the surface. */
5970         dst_pitch = width * byte_count;
5971         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5972
5973         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5974         {
5975             ERR("Out of memory (%u).\n", dst_pitch * height);
5976             return E_OUTOFMEMORY;
5977         }
5978
5979         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5980                 src_pitch, width, height, dst_pitch, convert, surface);
5981
5982         surface->flags |= SFLAG_CONVERTED;
5983     }
5984     else
5985     {
5986         surface->flags &= ~SFLAG_CONVERTED;
5987         mem = surface->resource.allocatedMemory;
5988         byte_count = format.byte_count;
5989     }
5990
5991     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
5992
5993     /* Don't delete PBO memory. */
5994     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
5995         HeapFree(GetProcessHeap(), 0, mem);
5996
5997     return WINED3D_OK;
5998 }
5999
6000 static HRESULT surface_load_texture(struct wined3d_surface *surface,
6001         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
6002 {
6003     const DWORD attach_flags = WINED3DFMT_FLAG_FBO_ATTACHABLE | WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB;
6004     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
6005     struct wined3d_device *device = surface->resource.device;
6006     struct wined3d_context *context = NULL;
6007     UINT width, src_pitch, dst_pitch;
6008     struct wined3d_bo_address data;
6009     struct wined3d_format format;
6010     POINT dst_point = {0, 0};
6011     CONVERT_TYPES convert;
6012     BYTE *mem;
6013
6014     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
6015             && surface_is_offscreen(surface)
6016             && (surface->flags & SFLAG_INDRAWABLE))
6017     {
6018         read_from_framebuffer_texture(surface, srgb);
6019
6020         return WINED3D_OK;
6021     }
6022
6023     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
6024             && (surface->resource.format->flags & attach_flags) == attach_flags
6025             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6026                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6027                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6028     {
6029         if (srgb)
6030             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INTEXTURE,
6031                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
6032         else
6033             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INSRGBTEX,
6034                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
6035
6036         return WINED3D_OK;
6037     }
6038
6039     /* Upload from system memory */
6040
6041     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
6042             TRUE /* We will use textures */, &format, &convert);
6043
6044     if (srgb)
6045     {
6046         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
6047         {
6048             /* Performance warning... */
6049             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6050             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6051         }
6052     }
6053     else
6054     {
6055         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6056         {
6057             /* Performance warning... */
6058             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6059             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6060         }
6061     }
6062
6063     if (!(surface->flags & SFLAG_INSYSMEM))
6064     {
6065         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6066         /* Lets hope we get it from somewhere... */
6067         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6068     }
6069
6070     if (!device->isInDraw)
6071         context = context_acquire(device, NULL);
6072
6073     surface_prepare_texture(surface, gl_info, srgb);
6074     surface_bind_and_dirtify(surface, gl_info, srgb);
6075
6076     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6077     {
6078         surface->flags |= SFLAG_GLCKEY;
6079         surface->glCKey = surface->SrcBltCKey;
6080     }
6081     else surface->flags &= ~SFLAG_GLCKEY;
6082
6083     width = surface->resource.width;
6084     src_pitch = wined3d_surface_get_pitch(surface);
6085
6086     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6087      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6088      * called. */
6089     if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
6090     {
6091         TRACE("Removing the pbo attached to surface %p.\n", surface);
6092         surface_remove_pbo(surface, gl_info);
6093     }
6094
6095     if (format.convert)
6096     {
6097         /* This code is entered for texture formats which need a fixup. */
6098         UINT height = surface->resource.height;
6099
6100         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6101         dst_pitch = width * format.conv_byte_count;
6102         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6103
6104         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6105         {
6106             ERR("Out of memory (%u).\n", dst_pitch * height);
6107             if (context)
6108                 context_release(context);
6109             return E_OUTOFMEMORY;
6110         }
6111         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6112     }
6113     else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
6114     {
6115         /* This code is only entered for color keying fixups */
6116         UINT height = surface->resource.height;
6117
6118         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6119         dst_pitch = width * format.conv_byte_count;
6120         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6121
6122         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6123         {
6124             ERR("Out of memory (%u).\n", dst_pitch * height);
6125             if (context)
6126                 context_release(context);
6127             return E_OUTOFMEMORY;
6128         }
6129         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6130                 width, height, dst_pitch, convert, surface);
6131     }
6132     else
6133     {
6134         mem = surface->resource.allocatedMemory;
6135     }
6136
6137     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6138     data.addr = mem;
6139     surface_upload_data(surface, gl_info, &format, &src_rect, width, &dst_point, srgb, &data);
6140
6141     if (context)
6142         context_release(context);
6143
6144     /* Don't delete PBO memory. */
6145     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6146         HeapFree(GetProcessHeap(), 0, mem);
6147
6148     return WINED3D_OK;
6149 }
6150
6151 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6152 {
6153     struct wined3d_device *device = surface->resource.device;
6154     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6155     BOOL in_fbo = FALSE;
6156     HRESULT hr;
6157
6158     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6159
6160     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6161     {
6162         if (location == SFLAG_INTEXTURE)
6163         {
6164             struct wined3d_context *context = context_acquire(device, NULL);
6165             surface_load_ds_location(surface, context, SFLAG_DS_OFFSCREEN);
6166             context_release(context);
6167             return WINED3D_OK;
6168         }
6169         else
6170         {
6171             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6172             return WINED3DERR_INVALIDCALL;
6173         }
6174     }
6175
6176     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6177     {
6178         if (surface_is_offscreen(surface))
6179         {
6180             /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same
6181              * for offscreen targets. Prefer SFLAG_INTEXTURE. */
6182             if (location == SFLAG_INDRAWABLE)
6183                 location = SFLAG_INTEXTURE;
6184             in_fbo = TRUE;
6185         }
6186         else
6187         {
6188             TRACE("Surface %p is an onscreen surface.\n", surface);
6189         }
6190     }
6191
6192     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6193         location = SFLAG_INTEXTURE;
6194
6195     if (surface->flags & location)
6196     {
6197         TRACE("Location already up to date.\n");
6198         return WINED3D_OK;
6199     }
6200
6201     if (WARN_ON(d3d_surface))
6202     {
6203         DWORD required_access = resource_access_from_location(location);
6204         if ((surface->resource.access_flags & required_access) != required_access)
6205             WARN("Operation requires %#x access, but surface only has %#x.\n",
6206                     required_access, surface->resource.access_flags);
6207     }
6208
6209     if (!(surface->flags & SFLAG_LOCATIONS))
6210     {
6211         ERR("Surface %p does not have any up to date location.\n", surface);
6212         surface->flags |= SFLAG_LOST;
6213         return WINED3DERR_DEVICELOST;
6214     }
6215
6216     switch (location)
6217     {
6218         case SFLAG_INSYSMEM:
6219             surface_load_sysmem(surface, gl_info, rect);
6220             break;
6221
6222         case SFLAG_INDRAWABLE:
6223             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6224                 return hr;
6225             break;
6226
6227         case SFLAG_INTEXTURE:
6228         case SFLAG_INSRGBTEX:
6229             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6230                 return hr;
6231             break;
6232
6233         default:
6234             ERR("Don't know how to handle location %#x.\n", location);
6235             break;
6236     }
6237
6238     if (!rect)
6239     {
6240         surface->flags |= location;
6241
6242         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6243             surface_evict_sysmem(surface);
6244     }
6245
6246     if (in_fbo && (surface->flags & (SFLAG_INTEXTURE | SFLAG_INDRAWABLE)))
6247     {
6248         /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same for offscreen targets. */
6249         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INDRAWABLE);
6250     }
6251
6252     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6253             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6254     {
6255         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6256     }
6257
6258     return WINED3D_OK;
6259 }
6260
6261 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6262 {
6263     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6264
6265     /* Not on a swapchain - must be offscreen */
6266     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6267
6268     /* The front buffer is always onscreen */
6269     if (surface == swapchain->front_buffer) return FALSE;
6270
6271     /* If the swapchain is rendered to an FBO, the backbuffer is
6272      * offscreen, otherwise onscreen */
6273     return swapchain->render_to_fbo;
6274 }
6275
6276 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6277 /* Context activation is done by the caller. */
6278 static void ffp_blit_free(struct wined3d_device *device) { }
6279
6280 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6281 /* Context activation is done by the caller. */
6282 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6283 {
6284     BYTE table[256][4];
6285     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6286
6287     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6288
6289     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6290     ENTER_GL();
6291     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6292     LEAVE_GL();
6293 }
6294
6295 /* Context activation is done by the caller. */
6296 static HRESULT ffp_blit_set(void *blit_priv, const struct wined3d_gl_info *gl_info, struct wined3d_surface *surface)
6297 {
6298     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6299
6300     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6301      * else the surface is converted in software at upload time in LoadLocation.
6302      */
6303     if(fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6304         ffp_blit_p8_upload_palette(surface, gl_info);
6305
6306     ENTER_GL();
6307     glEnable(surface->texture_target);
6308     checkGLcall("glEnable(surface->texture_target)");
6309     LEAVE_GL();
6310     return WINED3D_OK;
6311 }
6312
6313 /* Context activation is done by the caller. */
6314 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6315 {
6316     ENTER_GL();
6317     glDisable(GL_TEXTURE_2D);
6318     checkGLcall("glDisable(GL_TEXTURE_2D)");
6319     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6320     {
6321         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6322         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6323     }
6324     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6325     {
6326         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6327         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6328     }
6329     LEAVE_GL();
6330 }
6331
6332 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6333         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6334         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6335 {
6336     enum complex_fixup src_fixup;
6337
6338     switch (blit_op)
6339     {
6340         case WINED3D_BLIT_OP_COLOR_BLIT:
6341             if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
6342                 return FALSE;
6343
6344             src_fixup = get_complex_fixup(src_format->color_fixup);
6345             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6346             {
6347                 TRACE("Checking support for fixup:\n");
6348                 dump_color_fixup_desc(src_format->color_fixup);
6349             }
6350
6351             if (!is_identity_fixup(dst_format->color_fixup))
6352             {
6353                 TRACE("Destination fixups are not supported\n");
6354                 return FALSE;
6355             }
6356
6357             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6358             {
6359                 TRACE("P8 fixup supported\n");
6360                 return TRUE;
6361             }
6362
6363             /* We only support identity conversions. */
6364             if (is_identity_fixup(src_format->color_fixup))
6365             {
6366                 TRACE("[OK]\n");
6367                 return TRUE;
6368             }
6369
6370             TRACE("[FAILED]\n");
6371             return FALSE;
6372
6373         case WINED3D_BLIT_OP_COLOR_FILL:
6374             if (dst_pool == WINED3DPOOL_SYSTEMMEM)
6375                 return FALSE;
6376
6377             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6378             {
6379                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6380                     return FALSE;
6381             }
6382             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6383             {
6384                 TRACE("Color fill not supported\n");
6385                 return FALSE;
6386             }
6387
6388             /* FIXME: We should reject color fills on formats with fixups,
6389              * but this would break P8 color fills for example. */
6390
6391             return TRUE;
6392
6393         case WINED3D_BLIT_OP_DEPTH_FILL:
6394             return TRUE;
6395
6396         default:
6397             TRACE("Unsupported blit_op=%d\n", blit_op);
6398             return FALSE;
6399     }
6400 }
6401
6402 /* Do not call while under the GL lock. */
6403 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6404         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
6405 {
6406     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6407     struct wined3d_fb_state fb = {&dst_surface, NULL};
6408
6409     return device_clear_render_targets(device, 1, &fb,
6410             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6411 }
6412
6413 /* Do not call while under the GL lock. */
6414 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6415         struct wined3d_surface *surface, const RECT *rect, float depth)
6416 {
6417     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6418     struct wined3d_fb_state fb = {NULL, surface};
6419
6420     return device_clear_render_targets(device, 0, &fb,
6421             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6422 }
6423
6424 const struct blit_shader ffp_blit =  {
6425     ffp_blit_alloc,
6426     ffp_blit_free,
6427     ffp_blit_set,
6428     ffp_blit_unset,
6429     ffp_blit_supported,
6430     ffp_blit_color_fill,
6431     ffp_blit_depth_fill,
6432 };
6433
6434 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6435 {
6436     return WINED3D_OK;
6437 }
6438
6439 /* Context activation is done by the caller. */
6440 static void cpu_blit_free(struct wined3d_device *device)
6441 {
6442 }
6443
6444 /* Context activation is done by the caller. */
6445 static HRESULT cpu_blit_set(void *blit_priv, const struct wined3d_gl_info *gl_info, struct wined3d_surface *surface)
6446 {
6447     return WINED3D_OK;
6448 }
6449
6450 /* Context activation is done by the caller. */
6451 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6452 {
6453 }
6454
6455 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6456         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6457         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6458 {
6459     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6460     {
6461         return TRUE;
6462     }
6463
6464     return FALSE;
6465 }
6466
6467 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6468         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6469         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
6470 {
6471     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6472     const struct wined3d_format *src_format, *dst_format;
6473     struct wined3d_surface *orig_src = src_surface;
6474     WINED3DLOCKED_RECT dlock, slock;
6475     HRESULT hr = WINED3D_OK;
6476     const BYTE *sbuf;
6477     RECT xdst,xsrc;
6478     BYTE *dbuf;
6479     int x, y;
6480
6481     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6482             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6483             flags, fx, debug_d3dtexturefiltertype(filter));
6484
6485     xsrc = *src_rect;
6486
6487     if (!src_surface)
6488     {
6489         RECT full_rect;
6490
6491         full_rect.left = 0;
6492         full_rect.top = 0;
6493         full_rect.right = dst_surface->resource.width;
6494         full_rect.bottom = dst_surface->resource.height;
6495         IntersectRect(&xdst, &full_rect, dst_rect);
6496     }
6497     else
6498     {
6499         BOOL clip_horiz, clip_vert;
6500
6501         xdst = *dst_rect;
6502         clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6503         clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6504
6505         if (clip_vert || clip_horiz)
6506         {
6507             /* Now check if this is a special case or not... */
6508             if ((flags & WINEDDBLT_DDFX)
6509                     || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6510                     || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6511             {
6512                 WARN("Out of screen rectangle in special case. Not handled right now.\n");
6513                 return WINED3D_OK;
6514             }
6515
6516             if (clip_horiz)
6517             {
6518                 if (xdst.left < 0)
6519                 {
6520                     xsrc.left -= xdst.left;
6521                     xdst.left = 0;
6522                 }
6523                 if (xdst.right > dst_surface->resource.width)
6524                 {
6525                     xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6526                     xdst.right = (int)dst_surface->resource.width;
6527                 }
6528             }
6529
6530             if (clip_vert)
6531             {
6532                 if (xdst.top < 0)
6533                 {
6534                     xsrc.top -= xdst.top;
6535                     xdst.top = 0;
6536                 }
6537                 if (xdst.bottom > dst_surface->resource.height)
6538                 {
6539                     xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6540                     xdst.bottom = (int)dst_surface->resource.height;
6541                 }
6542             }
6543
6544             /* And check if after clipping something is still to be done... */
6545             if ((xdst.right <= 0) || (xdst.bottom <= 0)
6546                     || (xdst.left >= (int)dst_surface->resource.width)
6547                     || (xdst.top >= (int)dst_surface->resource.height)
6548                     || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6549                     || (xsrc.left >= (int)src_surface->resource.width)
6550                     || (xsrc.top >= (int)src_surface->resource.height))
6551             {
6552                 TRACE("Nothing to be done after clipping.\n");
6553                 return WINED3D_OK;
6554             }
6555         }
6556     }
6557
6558     if (src_surface == dst_surface)
6559     {
6560         wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6561         slock = dlock;
6562         src_format = dst_surface->resource.format;
6563         dst_format = src_format;
6564     }
6565     else
6566     {
6567         dst_format = dst_surface->resource.format;
6568         if (src_surface)
6569         {
6570             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6571             {
6572                 src_surface = surface_convert_format(src_surface, dst_format->id);
6573                 if (!src_surface)
6574                 {
6575                     /* The conv function writes a FIXME */
6576                     WARN("Cannot convert source surface format to dest format.\n");
6577                     goto release;
6578                 }
6579             }
6580             wined3d_surface_map(src_surface, &slock, NULL, WINED3DLOCK_READONLY);
6581             src_format = src_surface->resource.format;
6582         }
6583         else
6584         {
6585             src_format = dst_format;
6586         }
6587         if (dst_rect)
6588             wined3d_surface_map(dst_surface, &dlock, &xdst, 0);
6589         else
6590             wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6591     }
6592
6593     bpp = dst_surface->resource.format->byte_count;
6594     srcheight = xsrc.bottom - xsrc.top;
6595     srcwidth = xsrc.right - xsrc.left;
6596     dstheight = xdst.bottom - xdst.top;
6597     dstwidth = xdst.right - xdst.left;
6598     width = (xdst.right - xdst.left) * bpp;
6599
6600     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_COMPRESSED)
6601     {
6602         UINT row_block_count;
6603
6604         if (flags || src_surface == dst_surface)
6605         {
6606             FIXME("Only plain blits supported on compressed surfaces.\n");
6607             hr = E_NOTIMPL;
6608             goto release;
6609         }
6610
6611         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6612
6613         if (srcheight != dstheight || srcwidth != dstwidth)
6614         {
6615             WARN("Stretching not supported on compressed surfaces.\n");
6616             hr = WINED3DERR_INVALIDCALL;
6617             goto release;
6618         }
6619
6620         dbuf = dlock.pBits;
6621         sbuf = slock.pBits;
6622
6623         row_block_count = (dstwidth + dst_format->block_width - 1) / dst_format->block_width;
6624         for (y = 0; y < dstheight; y += dst_format->block_height)
6625         {
6626             memcpy(dbuf, sbuf, row_block_count * dst_format->block_byte_count);
6627             dbuf += dlock.Pitch;
6628             sbuf += slock.Pitch;
6629         }
6630
6631         goto release;
6632     }
6633
6634     if (dst_rect && src_surface != dst_surface)
6635         dbuf = dlock.pBits;
6636     else
6637         dbuf = (BYTE*)dlock.pBits+(xdst.top*dlock.Pitch)+(xdst.left*bpp);
6638
6639     /* First, all the 'source-less' blits */
6640     if (flags & WINEDDBLT_COLORFILL)
6641     {
6642         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dlock.Pitch, fx->u5.dwFillColor);
6643         flags &= ~WINEDDBLT_COLORFILL;
6644     }
6645
6646     if (flags & WINEDDBLT_DEPTHFILL)
6647     {
6648         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6649     }
6650     if (flags & WINEDDBLT_ROP)
6651     {
6652         /* Catch some degenerate cases here. */
6653         switch (fx->dwROP)
6654         {
6655             case BLACKNESS:
6656                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,0);
6657                 break;
6658             case 0xAA0029: /* No-op */
6659                 break;
6660             case WHITENESS:
6661                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,~0);
6662                 break;
6663             case SRCCOPY: /* Well, we do that below? */
6664                 break;
6665             default:
6666                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6667                 goto error;
6668         }
6669         flags &= ~WINEDDBLT_ROP;
6670     }
6671     if (flags & WINEDDBLT_DDROPS)
6672     {
6673         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6674     }
6675     /* Now the 'with source' blits. */
6676     if (src_surface)
6677     {
6678         const BYTE *sbase;
6679         int sx, xinc, sy, yinc;
6680
6681         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6682             goto release;
6683
6684         if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
6685                 && (srcwidth != dstwidth || srcheight != dstheight))
6686         {
6687             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6688             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6689         }
6690
6691         sbase = (BYTE*)slock.pBits+(xsrc.top*slock.Pitch)+xsrc.left*bpp;
6692         xinc = (srcwidth << 16) / dstwidth;
6693         yinc = (srcheight << 16) / dstheight;
6694
6695         if (!flags)
6696         {
6697             /* No effects, we can cheat here. */
6698             if (dstwidth == srcwidth)
6699             {
6700                 if (dstheight == srcheight)
6701                 {
6702                     /* No stretching in either direction. This needs to be as
6703                      * fast as possible. */
6704                     sbuf = sbase;
6705
6706                     /* Check for overlapping surfaces. */
6707                     if (src_surface != dst_surface || xdst.top < xsrc.top
6708                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6709                     {
6710                         /* No overlap, or dst above src, so copy from top downwards. */
6711                         for (y = 0; y < dstheight; ++y)
6712                         {
6713                             memcpy(dbuf, sbuf, width);
6714                             sbuf += slock.Pitch;
6715                             dbuf += dlock.Pitch;
6716                         }
6717                     }
6718                     else if (xdst.top > xsrc.top)
6719                     {
6720                         /* Copy from bottom upwards. */
6721                         sbuf += (slock.Pitch*dstheight);
6722                         dbuf += (dlock.Pitch*dstheight);
6723                         for (y = 0; y < dstheight; ++y)
6724                         {
6725                             sbuf -= slock.Pitch;
6726                             dbuf -= dlock.Pitch;
6727                             memcpy(dbuf, sbuf, width);
6728                         }
6729                     }
6730                     else
6731                     {
6732                         /* Src and dst overlapping on the same line, use memmove. */
6733                         for (y = 0; y < dstheight; ++y)
6734                         {
6735                             memmove(dbuf, sbuf, width);
6736                             sbuf += slock.Pitch;
6737                             dbuf += dlock.Pitch;
6738                         }
6739                     }
6740                 }
6741                 else
6742                 {
6743                     /* Stretching in y direction only. */
6744                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6745                     {
6746                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6747                         memcpy(dbuf, sbuf, width);
6748                         dbuf += dlock.Pitch;
6749                     }
6750                 }
6751             }
6752             else
6753             {
6754                 /* Stretching in X direction. */
6755                 int last_sy = -1;
6756                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6757                 {
6758                     sbuf = sbase + (sy >> 16) * slock.Pitch;
6759
6760                     if ((sy >> 16) == (last_sy >> 16))
6761                     {
6762                         /* This source row is the same as last source row -
6763                          * Copy the already stretched row. */
6764                         memcpy(dbuf, dbuf - dlock.Pitch, width);
6765                     }
6766                     else
6767                     {
6768 #define STRETCH_ROW(type) \
6769 do { \
6770     const type *s = (const type *)sbuf; \
6771     type *d = (type *)dbuf; \
6772     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6773         d[x] = s[sx >> 16]; \
6774 } while(0)
6775
6776                         switch(bpp)
6777                         {
6778                             case 1:
6779                                 STRETCH_ROW(BYTE);
6780                                 break;
6781                             case 2:
6782                                 STRETCH_ROW(WORD);
6783                                 break;
6784                             case 4:
6785                                 STRETCH_ROW(DWORD);
6786                                 break;
6787                             case 3:
6788                             {
6789                                 const BYTE *s;
6790                                 BYTE *d = dbuf;
6791                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6792                                 {
6793                                     DWORD pixel;
6794
6795                                     s = sbuf + 3 * (sx >> 16);
6796                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6797                                     d[0] = (pixel      ) & 0xff;
6798                                     d[1] = (pixel >>  8) & 0xff;
6799                                     d[2] = (pixel >> 16) & 0xff;
6800                                     d += 3;
6801                                 }
6802                                 break;
6803                             }
6804                             default:
6805                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6806                                 hr = WINED3DERR_NOTAVAILABLE;
6807                                 goto error;
6808                         }
6809 #undef STRETCH_ROW
6810                     }
6811                     dbuf += dlock.Pitch;
6812                     last_sy = sy;
6813                 }
6814             }
6815         }
6816         else
6817         {
6818             LONG dstyinc = dlock.Pitch, dstxinc = bpp;
6819             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6820             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6821             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6822             {
6823                 /* The color keying flags are checked for correctness in ddraw */
6824                 if (flags & WINEDDBLT_KEYSRC)
6825                 {
6826                     keylow  = src_surface->SrcBltCKey.dwColorSpaceLowValue;
6827                     keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
6828                 }
6829                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6830                 {
6831                     keylow = fx->ddckSrcColorkey.dwColorSpaceLowValue;
6832                     keyhigh = fx->ddckSrcColorkey.dwColorSpaceHighValue;
6833                 }
6834
6835                 if (flags & WINEDDBLT_KEYDEST)
6836                 {
6837                     /* Destination color keys are taken from the source surface! */
6838                     destkeylow = src_surface->DestBltCKey.dwColorSpaceLowValue;
6839                     destkeyhigh = src_surface->DestBltCKey.dwColorSpaceHighValue;
6840                 }
6841                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6842                 {
6843                     destkeylow = fx->ddckDestColorkey.dwColorSpaceLowValue;
6844                     destkeyhigh = fx->ddckDestColorkey.dwColorSpaceHighValue;
6845                 }
6846
6847                 if (bpp == 1)
6848                 {
6849                     keymask = 0xff;
6850                 }
6851                 else
6852                 {
6853                     keymask = src_format->red_mask
6854                             | src_format->green_mask
6855                             | src_format->blue_mask;
6856                 }
6857                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6858             }
6859
6860             if (flags & WINEDDBLT_DDFX)
6861             {
6862                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6863                 LONG tmpxy;
6864                 dTopLeft     = dbuf;
6865                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6866                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dlock.Pitch);
6867                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6868
6869                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6870                 {
6871                     /* I don't think we need to do anything about this flag */
6872                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6873                 }
6874                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6875                 {
6876                     tmp          = dTopRight;
6877                     dTopRight    = dTopLeft;
6878                     dTopLeft     = tmp;
6879                     tmp          = dBottomRight;
6880                     dBottomRight = dBottomLeft;
6881                     dBottomLeft  = tmp;
6882                     dstxinc = dstxinc * -1;
6883                 }
6884                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6885                 {
6886                     tmp          = dTopLeft;
6887                     dTopLeft     = dBottomLeft;
6888                     dBottomLeft  = tmp;
6889                     tmp          = dTopRight;
6890                     dTopRight    = dBottomRight;
6891                     dBottomRight = tmp;
6892                     dstyinc = dstyinc * -1;
6893                 }
6894                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6895                 {
6896                     /* I don't think we need to do anything about this flag */
6897                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6898                 }
6899                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6900                 {
6901                     tmp          = dBottomRight;
6902                     dBottomRight = dTopLeft;
6903                     dTopLeft     = tmp;
6904                     tmp          = dBottomLeft;
6905                     dBottomLeft  = dTopRight;
6906                     dTopRight    = tmp;
6907                     dstxinc = dstxinc * -1;
6908                     dstyinc = dstyinc * -1;
6909                 }
6910                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6911                 {
6912                     tmp          = dTopLeft;
6913                     dTopLeft     = dBottomLeft;
6914                     dBottomLeft  = dBottomRight;
6915                     dBottomRight = dTopRight;
6916                     dTopRight    = tmp;
6917                     tmpxy   = dstxinc;
6918                     dstxinc = dstyinc;
6919                     dstyinc = tmpxy;
6920                     dstxinc = dstxinc * -1;
6921                 }
6922                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6923                 {
6924                     tmp          = dTopLeft;
6925                     dTopLeft     = dTopRight;
6926                     dTopRight    = dBottomRight;
6927                     dBottomRight = dBottomLeft;
6928                     dBottomLeft  = tmp;
6929                     tmpxy   = dstxinc;
6930                     dstxinc = dstyinc;
6931                     dstyinc = tmpxy;
6932                     dstyinc = dstyinc * -1;
6933                 }
6934                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6935                 {
6936                     /* I don't think we need to do anything about this flag */
6937                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6938                 }
6939                 dbuf = dTopLeft;
6940                 flags &= ~(WINEDDBLT_DDFX);
6941             }
6942
6943 #define COPY_COLORKEY_FX(type) \
6944 do { \
6945     const type *s; \
6946     type *d = (type *)dbuf, *dx, tmp; \
6947     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
6948     { \
6949         s = (const type *)(sbase + (sy >> 16) * slock.Pitch); \
6950         dx = d; \
6951         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6952         { \
6953             tmp = s[sx >> 16]; \
6954             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
6955                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
6956             { \
6957                 dx[0] = tmp; \
6958             } \
6959             dx = (type *)(((BYTE *)dx) + dstxinc); \
6960         } \
6961         d = (type *)(((BYTE *)d) + dstyinc); \
6962     } \
6963 } while(0)
6964
6965             switch (bpp)
6966             {
6967                 case 1:
6968                     COPY_COLORKEY_FX(BYTE);
6969                     break;
6970                 case 2:
6971                     COPY_COLORKEY_FX(WORD);
6972                     break;
6973                 case 4:
6974                     COPY_COLORKEY_FX(DWORD);
6975                     break;
6976                 case 3:
6977                 {
6978                     const BYTE *s;
6979                     BYTE *d = dbuf, *dx;
6980                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6981                     {
6982                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6983                         dx = d;
6984                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
6985                         {
6986                             DWORD pixel, dpixel = 0;
6987                             s = sbuf + 3 * (sx>>16);
6988                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6989                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
6990                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
6991                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
6992                             {
6993                                 dx[0] = (pixel      ) & 0xff;
6994                                 dx[1] = (pixel >>  8) & 0xff;
6995                                 dx[2] = (pixel >> 16) & 0xff;
6996                             }
6997                             dx += dstxinc;
6998                         }
6999                         d += dstyinc;
7000                     }
7001                     break;
7002                 }
7003                 default:
7004                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7005                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7006                     hr = WINED3DERR_NOTAVAILABLE;
7007                     goto error;
7008 #undef COPY_COLORKEY_FX
7009             }
7010         }
7011     }
7012
7013 error:
7014     if (flags && FIXME_ON(d3d_surface))
7015     {
7016         FIXME("\tUnsupported flags: %#x.\n", flags);
7017     }
7018
7019 release:
7020     wined3d_surface_unmap(dst_surface);
7021     if (src_surface && src_surface != dst_surface)
7022         wined3d_surface_unmap(src_surface);
7023     /* Release the converted surface, if any. */
7024     if (src_surface && src_surface != orig_src)
7025         wined3d_surface_decref(src_surface);
7026
7027     return hr;
7028 }
7029
7030 /* Do not call while under the GL lock. */
7031 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7032         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
7033 {
7034     static const RECT src_rect;
7035     WINEDDBLTFX BltFx;
7036
7037     memset(&BltFx, 0, sizeof(BltFx));
7038     BltFx.dwSize = sizeof(BltFx);
7039     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7040     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7041             WINEDDBLT_COLORFILL, &BltFx, WINED3DTEXF_POINT);
7042 }
7043
7044 /* Do not call while under the GL lock. */
7045 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7046         struct wined3d_surface *surface, const RECT *rect, float depth)
7047 {
7048     FIXME("Depth filling not implemented by cpu_blit.\n");
7049     return WINED3DERR_INVALIDCALL;
7050 }
7051
7052 const struct blit_shader cpu_blit =  {
7053     cpu_blit_alloc,
7054     cpu_blit_free,
7055     cpu_blit_set,
7056     cpu_blit_unset,
7057     cpu_blit_supported,
7058     cpu_blit_color_fill,
7059     cpu_blit_depth_fill,
7060 };
7061
7062 static HRESULT surface_init(struct wined3d_surface *surface, WINED3DSURFTYPE surface_type, UINT alignment,
7063         UINT width, UINT height, UINT level, BOOL lockable, BOOL discard, WINED3DMULTISAMPLE_TYPE multisample_type,
7064         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7065         WINED3DPOOL pool, void *parent, const struct wined3d_parent_ops *parent_ops)
7066 {
7067     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7068     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7069     unsigned int resource_size;
7070     HRESULT hr;
7071
7072     if (multisample_quality > 0)
7073     {
7074         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7075         multisample_quality = 0;
7076     }
7077
7078     /* Quick lockable sanity check.
7079      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7080      * this function is too deep to need to care about things like this.
7081      * Levels need to be checked too, since they all affect what can be done. */
7082     switch (pool)
7083     {
7084         case WINED3DPOOL_SCRATCH:
7085             if (!lockable)
7086             {
7087                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7088                         "which are mutually exclusive, setting lockable to TRUE.\n");
7089                 lockable = TRUE;
7090             }
7091             break;
7092
7093         case WINED3DPOOL_SYSTEMMEM:
7094             if (!lockable)
7095                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7096             break;
7097
7098         case WINED3DPOOL_MANAGED:
7099             if (usage & WINED3DUSAGE_DYNAMIC)
7100                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7101             break;
7102
7103         case WINED3DPOOL_DEFAULT:
7104             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7105                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7106             break;
7107
7108         default:
7109             FIXME("Unknown pool %#x.\n", pool);
7110             break;
7111     };
7112
7113     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3DPOOL_DEFAULT)
7114         FIXME("Trying to create a render target that isn't in the default pool.\n");
7115
7116     /* FIXME: Check that the format is supported by the device. */
7117
7118     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7119     if (!resource_size)
7120         return WINED3DERR_INVALIDCALL;
7121
7122     surface->surface_type = surface_type;
7123
7124     switch (surface_type)
7125     {
7126         case SURFACE_OPENGL:
7127             surface->surface_ops = &surface_ops;
7128             break;
7129
7130         case SURFACE_GDI:
7131             surface->surface_ops = &gdi_surface_ops;
7132             break;
7133
7134         default:
7135             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7136             return WINED3DERR_INVALIDCALL;
7137     }
7138
7139     hr = resource_init(&surface->resource, device, WINED3DRTYPE_SURFACE, format,
7140             multisample_type, multisample_quality, usage, pool, width, height, 1,
7141             resource_size, parent, parent_ops, &surface_resource_ops);
7142     if (FAILED(hr))
7143     {
7144         WARN("Failed to initialize resource, returning %#x.\n", hr);
7145         return hr;
7146     }
7147
7148     /* "Standalone" surface. */
7149     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7150
7151     surface->texture_level = level;
7152     list_init(&surface->overlays);
7153
7154     /* Flags */
7155     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7156     if (discard)
7157         surface->flags |= SFLAG_DISCARD;
7158     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7159         surface->flags |= SFLAG_LOCKABLE;
7160     /* I'm not sure if this qualifies as a hack or as an optimization. It
7161      * seems reasonable to assume that lockable render targets will get
7162      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7163      * creation. However, the other reason we want to do this is that several
7164      * ddraw applications access surface memory while the surface isn't
7165      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7166      * future locks prevents these from crashing. */
7167     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7168         surface->flags |= SFLAG_DYNLOCK;
7169
7170     /* Mark the texture as dirty so that it gets loaded first time around. */
7171     surface_add_dirty_rect(surface, NULL);
7172     list_init(&surface->renderbuffers);
7173
7174     TRACE("surface %p, memory %p, size %u\n",
7175             surface, surface->resource.allocatedMemory, surface->resource.size);
7176
7177     /* Call the private setup routine */
7178     hr = surface->surface_ops->surface_private_setup(surface);
7179     if (FAILED(hr))
7180     {
7181         ERR("Private setup failed, returning %#x\n", hr);
7182         surface->surface_ops->surface_cleanup(surface);
7183         return hr;
7184     }
7185
7186     return hr;
7187 }
7188
7189 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7190         enum wined3d_format_id format_id, BOOL lockable, BOOL discard, UINT level, DWORD usage, WINED3DPOOL pool,
7191         WINED3DMULTISAMPLE_TYPE multisample_type, DWORD multisample_quality, WINED3DSURFTYPE surface_type,
7192         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7193 {
7194     struct wined3d_surface *object;
7195     HRESULT hr;
7196
7197     TRACE("device %p, width %u, height %u, format %s, lockable %#x, discard %#x, level %u\n",
7198             device, width, height, debug_d3dformat(format_id), lockable, discard, level);
7199     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7200             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7201     TRACE("surface_type %#x, parent %p, parent_ops %p.\n", surface_type, parent, parent_ops);
7202
7203     if (surface_type == SURFACE_OPENGL && !device->adapter)
7204     {
7205         ERR("OpenGL surfaces are not available without OpenGL.\n");
7206         return WINED3DERR_NOTAVAILABLE;
7207     }
7208
7209     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7210     if (!object)
7211     {
7212         ERR("Failed to allocate surface memory.\n");
7213         return WINED3DERR_OUTOFVIDEOMEMORY;
7214     }
7215
7216     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level, lockable,
7217             discard, multisample_type, multisample_quality, device, usage, format_id, pool, parent, parent_ops);
7218     if (FAILED(hr))
7219     {
7220         WARN("Failed to initialize surface, returning %#x.\n", hr);
7221         HeapFree(GetProcessHeap(), 0, object);
7222         return hr;
7223     }
7224
7225     TRACE("Created surface %p.\n", object);
7226     *surface = object;
7227
7228     return hr;
7229 }