wined3d: Remove d3d8/9 palette support.
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2008 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         WINED3DTEXTUREFILTERTYPE filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     TRACE("surface %p.\n", surface);
46
47     if (surface->texture_name || (surface->flags & SFLAG_PBO)
48              || surface->rb_multisample || surface->rb_resolved
49              || !list_empty(&surface->renderbuffers))
50     {
51         struct wined3d_renderbuffer_entry *entry, *entry2;
52         const struct wined3d_gl_info *gl_info;
53         struct wined3d_context *context;
54
55         context = context_acquire(surface->resource.device, NULL);
56         gl_info = context->gl_info;
57
58         ENTER_GL();
59
60         if (surface->texture_name)
61         {
62             TRACE("Deleting texture %u.\n", surface->texture_name);
63             glDeleteTextures(1, &surface->texture_name);
64         }
65
66         if (surface->flags & SFLAG_PBO)
67         {
68             TRACE("Deleting PBO %u.\n", surface->pbo);
69             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
70         }
71
72         if (surface->rb_multisample)
73         {
74             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
75             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
76         }
77
78         if (surface->rb_resolved)
79         {
80             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
81             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
82         }
83
84         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
85         {
86             TRACE("Deleting renderbuffer %u.\n", entry->id);
87             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
88             HeapFree(GetProcessHeap(), 0, entry);
89         }
90
91         LEAVE_GL();
92
93         context_release(context);
94     }
95
96     if (surface->flags & SFLAG_DIBSECTION)
97     {
98         /* Release the DC. */
99         SelectObject(surface->hDC, surface->dib.holdbitmap);
100         DeleteDC(surface->hDC);
101         /* Release the DIB section. */
102         DeleteObject(surface->dib.DIBsection);
103         surface->dib.bitmap_data = NULL;
104         surface->resource.allocatedMemory = NULL;
105     }
106
107     if (surface->flags & SFLAG_USERPTR)
108         wined3d_surface_set_mem(surface, NULL);
109     if (surface->overlay_dest)
110         list_remove(&surface->overlay_entry);
111
112     HeapFree(GetProcessHeap(), 0, surface->palette9);
113
114     resource_cleanup(&surface->resource);
115 }
116
117 void surface_update_draw_binding(struct wined3d_surface *surface)
118 {
119     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
120         surface->draw_binding = SFLAG_INDRAWABLE;
121     else if (surface->resource.multisample_type)
122         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
123     else
124         surface->draw_binding = SFLAG_INTEXTURE;
125 }
126
127 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
128 {
129     TRACE("surface %p, container %p.\n", surface, container);
130
131     if (!container && type != WINED3D_CONTAINER_NONE)
132         ERR("Setting NULL container of type %#x.\n", type);
133
134     if (type == WINED3D_CONTAINER_SWAPCHAIN)
135     {
136         surface->get_drawable_size = get_drawable_size_swapchain;
137     }
138     else
139     {
140         switch (wined3d_settings.offscreen_rendering_mode)
141         {
142             case ORM_FBO:
143                 surface->get_drawable_size = get_drawable_size_fbo;
144                 break;
145
146             case ORM_BACKBUFFER:
147                 surface->get_drawable_size = get_drawable_size_backbuffer;
148                 break;
149
150             default:
151                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
152                 return;
153         }
154     }
155
156     surface->container.type = type;
157     surface->container.u.base = container;
158     surface_update_draw_binding(surface);
159 }
160
161 struct blt_info
162 {
163     GLenum binding;
164     GLenum bind_target;
165     enum tex_types tex_type;
166     GLfloat coords[4][3];
167 };
168
169 struct float_rect
170 {
171     float l;
172     float t;
173     float r;
174     float b;
175 };
176
177 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
178 {
179     f->l = ((r->left * 2.0f) / w) - 1.0f;
180     f->t = ((r->top * 2.0f) / h) - 1.0f;
181     f->r = ((r->right * 2.0f) / w) - 1.0f;
182     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
183 }
184
185 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
186 {
187     GLfloat (*coords)[3] = info->coords;
188     struct float_rect f;
189
190     switch (target)
191     {
192         default:
193             FIXME("Unsupported texture target %#x\n", target);
194             /* Fall back to GL_TEXTURE_2D */
195         case GL_TEXTURE_2D:
196             info->binding = GL_TEXTURE_BINDING_2D;
197             info->bind_target = GL_TEXTURE_2D;
198             info->tex_type = tex_2d;
199             coords[0][0] = (float)rect->left / w;
200             coords[0][1] = (float)rect->top / h;
201             coords[0][2] = 0.0f;
202
203             coords[1][0] = (float)rect->right / w;
204             coords[1][1] = (float)rect->top / h;
205             coords[1][2] = 0.0f;
206
207             coords[2][0] = (float)rect->left / w;
208             coords[2][1] = (float)rect->bottom / h;
209             coords[2][2] = 0.0f;
210
211             coords[3][0] = (float)rect->right / w;
212             coords[3][1] = (float)rect->bottom / h;
213             coords[3][2] = 0.0f;
214             break;
215
216         case GL_TEXTURE_RECTANGLE_ARB:
217             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
218             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
219             info->tex_type = tex_rect;
220             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
221             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
222             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
223             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
224             break;
225
226         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
227             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
228             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
229             info->tex_type = tex_cube;
230             cube_coords_float(rect, w, h, &f);
231
232             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
233             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
234             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
235             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
236             break;
237
238         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
239             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
240             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
241             info->tex_type = tex_cube;
242             cube_coords_float(rect, w, h, &f);
243
244             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
245             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
246             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
247             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
248             break;
249
250         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
251             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
252             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
253             info->tex_type = tex_cube;
254             cube_coords_float(rect, w, h, &f);
255
256             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
257             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
258             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
259             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
260             break;
261
262         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
263             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
264             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
265             info->tex_type = tex_cube;
266             cube_coords_float(rect, w, h, &f);
267
268             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
269             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
270             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
271             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
272             break;
273
274         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
275             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
276             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
277             info->tex_type = tex_cube;
278             cube_coords_float(rect, w, h, &f);
279
280             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
281             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
282             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
283             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
284             break;
285
286         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
287             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
288             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
289             info->tex_type = tex_cube;
290             cube_coords_float(rect, w, h, &f);
291
292             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
293             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
294             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
295             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
296             break;
297     }
298 }
299
300 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
301 {
302     if (rect_in)
303         *rect_out = *rect_in;
304     else
305     {
306         rect_out->left = 0;
307         rect_out->top = 0;
308         rect_out->right = surface->resource.width;
309         rect_out->bottom = surface->resource.height;
310     }
311 }
312
313 /* GL locking and context activation is done by the caller */
314 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
315         const RECT *src_rect, const RECT *dst_rect, WINED3DTEXTUREFILTERTYPE Filter)
316 {
317     struct blt_info info;
318
319     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
320
321     glEnable(info.bind_target);
322     checkGLcall("glEnable(bind_target)");
323
324     context_bind_texture(context, info.bind_target, src_surface->texture_name);
325
326     /* Filtering for StretchRect */
327     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
328             wined3d_gl_mag_filter(magLookup, Filter));
329     checkGLcall("glTexParameteri");
330     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
331             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
332     checkGLcall("glTexParameteri");
333     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
334     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
335     if (context->gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
336         glTexParameteri(info.bind_target, GL_TEXTURE_SRGB_DECODE_EXT, GL_SKIP_DECODE_EXT);
337     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
338     checkGLcall("glTexEnvi");
339
340     /* Draw a quad */
341     glBegin(GL_TRIANGLE_STRIP);
342     glTexCoord3fv(info.coords[0]);
343     glVertex2i(dst_rect->left, dst_rect->top);
344
345     glTexCoord3fv(info.coords[1]);
346     glVertex2i(dst_rect->right, dst_rect->top);
347
348     glTexCoord3fv(info.coords[2]);
349     glVertex2i(dst_rect->left, dst_rect->bottom);
350
351     glTexCoord3fv(info.coords[3]);
352     glVertex2i(dst_rect->right, dst_rect->bottom);
353     glEnd();
354
355     /* Unbind the texture */
356     context_bind_texture(context, info.bind_target, 0);
357
358     /* We changed the filtering settings on the texture. Inform the
359      * container about this to get the filters reset properly next draw. */
360     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
361     {
362         struct wined3d_texture *texture = src_surface->container.u.texture;
363         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3DTEXF_POINT;
364         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3DTEXF_POINT;
365         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3DTEXF_NONE;
366         texture->texture_rgb.states[WINED3DTEXSTA_SRGBTEXTURE] = FALSE;
367     }
368 }
369
370 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
371 {
372     const struct wined3d_format *format = surface->resource.format;
373     SYSTEM_INFO sysInfo;
374     BITMAPINFO *b_info;
375     int extraline = 0;
376     DWORD *masks;
377     UINT usage;
378     HDC dc;
379
380     TRACE("surface %p.\n", surface);
381
382     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
383     {
384         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
385         return WINED3DERR_INVALIDCALL;
386     }
387
388     switch (format->byte_count)
389     {
390         case 2:
391         case 4:
392             /* Allocate extra space to store the RGB bit masks. */
393             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
394             break;
395
396         case 3:
397             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
398             break;
399
400         default:
401             /* Allocate extra space for a palette. */
402             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
403                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
404             break;
405     }
406
407     if (!b_info)
408         return E_OUTOFMEMORY;
409
410     /* Some applications access the surface in via DWORDs, and do not take
411      * the necessary care at the end of the surface. So we need at least
412      * 4 extra bytes at the end of the surface. Check against the page size,
413      * if the last page used for the surface has at least 4 spare bytes we're
414      * safe, otherwise add an extra line to the DIB section. */
415     GetSystemInfo(&sysInfo);
416     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
417     {
418         extraline = 1;
419         TRACE("Adding an extra line to the DIB section.\n");
420     }
421
422     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
423     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
424     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
425     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
426     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
427             * wined3d_surface_get_pitch(surface);
428     b_info->bmiHeader.biPlanes = 1;
429     b_info->bmiHeader.biBitCount = format->byte_count * 8;
430
431     b_info->bmiHeader.biXPelsPerMeter = 0;
432     b_info->bmiHeader.biYPelsPerMeter = 0;
433     b_info->bmiHeader.biClrUsed = 0;
434     b_info->bmiHeader.biClrImportant = 0;
435
436     /* Get the bit masks */
437     masks = (DWORD *)b_info->bmiColors;
438     switch (surface->resource.format->id)
439     {
440         case WINED3DFMT_B8G8R8_UNORM:
441             usage = DIB_RGB_COLORS;
442             b_info->bmiHeader.biCompression = BI_RGB;
443             break;
444
445         case WINED3DFMT_B5G5R5X1_UNORM:
446         case WINED3DFMT_B5G5R5A1_UNORM:
447         case WINED3DFMT_B4G4R4A4_UNORM:
448         case WINED3DFMT_B4G4R4X4_UNORM:
449         case WINED3DFMT_B2G3R3_UNORM:
450         case WINED3DFMT_B2G3R3A8_UNORM:
451         case WINED3DFMT_R10G10B10A2_UNORM:
452         case WINED3DFMT_R8G8B8A8_UNORM:
453         case WINED3DFMT_R8G8B8X8_UNORM:
454         case WINED3DFMT_B10G10R10A2_UNORM:
455         case WINED3DFMT_B5G6R5_UNORM:
456         case WINED3DFMT_R16G16B16A16_UNORM:
457             usage = 0;
458             b_info->bmiHeader.biCompression = BI_BITFIELDS;
459             masks[0] = format->red_mask;
460             masks[1] = format->green_mask;
461             masks[2] = format->blue_mask;
462             break;
463
464         default:
465             /* Don't know palette */
466             b_info->bmiHeader.biCompression = BI_RGB;
467             usage = 0;
468             break;
469     }
470
471     if (!(dc = GetDC(0)))
472     {
473         HeapFree(GetProcessHeap(), 0, b_info);
474         return HRESULT_FROM_WIN32(GetLastError());
475     }
476
477     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
478             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
479             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
480     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
481     ReleaseDC(0, dc);
482
483     if (!surface->dib.DIBsection)
484     {
485         ERR("Failed to create DIB section.\n");
486         HeapFree(GetProcessHeap(), 0, b_info);
487         return HRESULT_FROM_WIN32(GetLastError());
488     }
489
490     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
491     /* Copy the existing surface to the dib section. */
492     if (surface->resource.allocatedMemory)
493     {
494         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
495                 surface->resource.height * wined3d_surface_get_pitch(surface));
496     }
497     else
498     {
499         /* This is to make maps read the GL texture although memory is allocated. */
500         surface->flags &= ~SFLAG_INSYSMEM;
501     }
502     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
503
504     HeapFree(GetProcessHeap(), 0, b_info);
505
506     /* Now allocate a DC. */
507     surface->hDC = CreateCompatibleDC(0);
508     surface->dib.holdbitmap = SelectObject(surface->hDC, surface->dib.DIBsection);
509     TRACE("Using wined3d palette %p.\n", surface->palette);
510     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
511
512     surface->flags |= SFLAG_DIBSECTION;
513
514     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
515     surface->resource.heapMemory = NULL;
516
517     return WINED3D_OK;
518 }
519
520 static void surface_prepare_system_memory(struct wined3d_surface *surface)
521 {
522     struct wined3d_device *device = surface->resource.device;
523     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
524
525     TRACE("surface %p.\n", surface);
526
527     /* Performance optimization: Count how often a surface is locked, if it is
528      * locked regularly do not throw away the system memory copy. This avoids
529      * the need to download the surface from OpenGL all the time. The surface
530      * is still downloaded if the OpenGL texture is changed. */
531     if (!(surface->flags & SFLAG_DYNLOCK))
532     {
533         if (++surface->lockCount > MAXLOCKCOUNT)
534         {
535             TRACE("Surface is locked regularly, not freeing the system memory copy any more.\n");
536             surface->flags |= SFLAG_DYNLOCK;
537         }
538     }
539
540     /* Create a PBO for dynamically locked surfaces but don't do it for
541      * converted or NPOT surfaces. Also don't create a PBO for systemmem
542      * surfaces. */
543     if (gl_info->supported[ARB_PIXEL_BUFFER_OBJECT] && (surface->flags & SFLAG_DYNLOCK)
544             && !(surface->flags & (SFLAG_PBO | SFLAG_CONVERTED | SFLAG_NONPOW2))
545             && (surface->resource.pool != WINED3DPOOL_SYSTEMMEM))
546     {
547         struct wined3d_context *context;
548         GLenum error;
549
550         context = context_acquire(device, NULL);
551         ENTER_GL();
552
553         GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
554         error = glGetError();
555         if (!surface->pbo || error != GL_NO_ERROR)
556             ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
557
558         TRACE("Binding PBO %u.\n", surface->pbo);
559
560         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
561         checkGLcall("glBindBufferARB");
562
563         GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
564                 surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
565         checkGLcall("glBufferDataARB");
566
567         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
568         checkGLcall("glBindBufferARB");
569
570         /* We don't need the system memory anymore and we can't even use it for PBOs. */
571         if (!(surface->flags & SFLAG_CLIENT))
572         {
573             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
574             surface->resource.heapMemory = NULL;
575         }
576         surface->resource.allocatedMemory = NULL;
577         surface->flags |= SFLAG_PBO;
578         LEAVE_GL();
579         context_release(context);
580     }
581     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
582     {
583         /* Whatever surface we have, make sure that there is memory allocated
584          * for the downloaded copy, or a PBO to map. */
585         if (!surface->resource.heapMemory)
586             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
587
588         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
589                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
590
591         if (surface->flags & SFLAG_INSYSMEM)
592             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
593     }
594 }
595
596 static void surface_evict_sysmem(struct wined3d_surface *surface)
597 {
598     if (surface->flags & SFLAG_DONOTFREE)
599         return;
600
601     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
602     surface->resource.allocatedMemory = NULL;
603     surface->resource.heapMemory = NULL;
604     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
605 }
606
607 /* Context activation is done by the caller. */
608 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
609         struct wined3d_context *context, BOOL srgb)
610 {
611     struct wined3d_device *device = surface->resource.device;
612     DWORD active_sampler;
613
614     /* We don't need a specific texture unit, but after binding the texture
615      * the current unit is dirty. Read the unit back instead of switching to
616      * 0, this avoids messing around with the state manager's GL states. The
617      * current texture unit should always be a valid one.
618      *
619      * To be more specific, this is tricky because we can implicitly be
620      * called from sampler() in state.c. This means we can't touch anything
621      * other than whatever happens to be the currently active texture, or we
622      * would risk marking already applied sampler states dirty again. */
623     active_sampler = device->rev_tex_unit_map[context->active_texture];
624
625     if (active_sampler != WINED3D_UNMAPPED_STAGE)
626         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
627     surface_bind(surface, context, srgb);
628 }
629
630 static void surface_force_reload(struct wined3d_surface *surface)
631 {
632     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
633 }
634
635 static void surface_release_client_storage(struct wined3d_surface *surface)
636 {
637     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
638
639     ENTER_GL();
640     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
641     if (surface->texture_name)
642     {
643         surface_bind_and_dirtify(surface, context, FALSE);
644         glTexImage2D(surface->texture_target, surface->texture_level,
645                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
646     }
647     if (surface->texture_name_srgb)
648     {
649         surface_bind_and_dirtify(surface, context, TRUE);
650         glTexImage2D(surface->texture_target, surface->texture_level,
651                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
652     }
653     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
654     LEAVE_GL();
655
656     context_release(context);
657
658     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
659     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
660     surface_force_reload(surface);
661 }
662
663 static HRESULT surface_private_setup(struct wined3d_surface *surface)
664 {
665     /* TODO: Check against the maximum texture sizes supported by the video card. */
666     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
667     unsigned int pow2Width, pow2Height;
668
669     TRACE("surface %p.\n", surface);
670
671     surface->texture_name = 0;
672     surface->texture_target = GL_TEXTURE_2D;
673
674     /* Non-power2 support */
675     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
676     {
677         pow2Width = surface->resource.width;
678         pow2Height = surface->resource.height;
679     }
680     else
681     {
682         /* Find the nearest pow2 match */
683         pow2Width = pow2Height = 1;
684         while (pow2Width < surface->resource.width)
685             pow2Width <<= 1;
686         while (pow2Height < surface->resource.height)
687             pow2Height <<= 1;
688     }
689     surface->pow2Width = pow2Width;
690     surface->pow2Height = pow2Height;
691
692     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
693     {
694         /* TODO: Add support for non power two compressed textures. */
695         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
696         {
697             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
698                   surface, surface->resource.width, surface->resource.height);
699             return WINED3DERR_NOTAVAILABLE;
700         }
701     }
702
703     if (pow2Width != surface->resource.width
704             || pow2Height != surface->resource.height)
705     {
706         surface->flags |= SFLAG_NONPOW2;
707     }
708
709     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
710             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
711     {
712         /* One of three options:
713          * 1: Do the same as we do with NPOT and scale the texture, (any
714          *    texture ops would require the texture to be scaled which is
715          *    potentially slow)
716          * 2: Set the texture to the maximum size (bad idea).
717          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
718          * 4: Create the surface, but allow it to be used only for DirectDraw
719          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
720          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
721          *    the render target. */
722         if (surface->resource.pool == WINED3DPOOL_DEFAULT || surface->resource.pool == WINED3DPOOL_MANAGED)
723         {
724             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
725             return WINED3DERR_NOTAVAILABLE;
726         }
727
728         /* We should never use this surface in combination with OpenGL! */
729         TRACE("Creating an oversized surface: %ux%u.\n",
730                 surface->pow2Width, surface->pow2Height);
731     }
732     else
733     {
734         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
735          * and EXT_PALETTED_TEXTURE is used in combination with texture
736          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
737          * EXT_PALETTED_TEXTURE doesn't work in combination with
738          * ARB_TEXTURE_RECTANGLE. */
739         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
740                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
741                 && gl_info->supported[EXT_PALETTED_TEXTURE]
742                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
743         {
744             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
745             surface->pow2Width = surface->resource.width;
746             surface->pow2Height = surface->resource.height;
747             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
748         }
749     }
750
751     switch (wined3d_settings.offscreen_rendering_mode)
752     {
753         case ORM_FBO:
754             surface->get_drawable_size = get_drawable_size_fbo;
755             break;
756
757         case ORM_BACKBUFFER:
758             surface->get_drawable_size = get_drawable_size_backbuffer;
759             break;
760
761         default:
762             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
763             return WINED3DERR_INVALIDCALL;
764     }
765
766     surface->flags |= SFLAG_INSYSMEM;
767
768     return WINED3D_OK;
769 }
770
771 static void surface_realize_palette(struct wined3d_surface *surface)
772 {
773     struct wined3d_palette *palette = surface->palette;
774
775     TRACE("surface %p.\n", surface);
776
777     if (!palette) return;
778
779     if (surface->resource.format->id == WINED3DFMT_P8_UINT
780             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
781     {
782         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
783         {
784             /* Make sure the texture is up to date. This call doesn't do
785              * anything if the texture is already up to date. */
786             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
787
788             /* We want to force a palette refresh, so mark the drawable as not being up to date */
789             if (!surface_is_offscreen(surface))
790                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
791         }
792         else
793         {
794             if (!(surface->flags & SFLAG_INSYSMEM))
795             {
796                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
797                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
798             }
799             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
800         }
801     }
802
803     if (surface->flags & SFLAG_DIBSECTION)
804     {
805         RGBQUAD col[256];
806         unsigned int i;
807
808         TRACE("Updating the DC's palette.\n");
809
810         for (i = 0; i < 256; ++i)
811         {
812             col[i].rgbRed   = palette->palents[i].peRed;
813             col[i].rgbGreen = palette->palents[i].peGreen;
814             col[i].rgbBlue  = palette->palents[i].peBlue;
815             col[i].rgbReserved = 0;
816         }
817         SetDIBColorTable(surface->hDC, 0, 256, col);
818     }
819
820     /* Propagate the changes to the drawable when we have a palette. */
821     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
822         surface_load_location(surface, surface->draw_binding, NULL);
823 }
824
825 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
826 {
827     HRESULT hr;
828
829     /* If there's no destination surface there is nothing to do. */
830     if (!surface->overlay_dest)
831         return WINED3D_OK;
832
833     /* Blt calls ModifyLocation on the dest surface, which in turn calls
834      * DrawOverlay to update the overlay. Prevent an endless recursion. */
835     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
836         return WINED3D_OK;
837
838     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
839     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
840             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3DTEXF_LINEAR);
841     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
842
843     return hr;
844 }
845
846 static void surface_preload(struct wined3d_surface *surface)
847 {
848     TRACE("surface %p.\n", surface);
849
850     surface_internal_preload(surface, SRGB_ANY);
851 }
852
853 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
854 {
855     struct wined3d_device *device = surface->resource.device;
856     const RECT *pass_rect = rect;
857
858     TRACE("surface %p, rect %s, flags %#x.\n",
859             surface, wine_dbgstr_rect(rect), flags);
860
861     if (flags & WINED3DLOCK_DISCARD)
862     {
863         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
864         surface_prepare_system_memory(surface);
865         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
866     }
867     else
868     {
869         /* surface_load_location() does not check if the rectangle specifies
870          * the full surface. Most callers don't need that, so do it here. */
871         if (rect && !rect->top && !rect->left
872                 && rect->right == surface->resource.width
873                 && rect->bottom == surface->resource.height)
874             pass_rect = NULL;
875
876         if (!(wined3d_settings.rendertargetlock_mode == RTL_DISABLE
877                 && ((surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
878                 || surface == device->fb.render_targets[0])))
879             surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
880     }
881
882     if (surface->flags & SFLAG_PBO)
883     {
884         const struct wined3d_gl_info *gl_info;
885         struct wined3d_context *context;
886
887         context = context_acquire(device, NULL);
888         gl_info = context->gl_info;
889
890         ENTER_GL();
891         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
892         checkGLcall("glBindBufferARB");
893
894         /* This shouldn't happen but could occur if some other function
895          * didn't handle the PBO properly. */
896         if (surface->resource.allocatedMemory)
897             ERR("The surface already has PBO memory allocated.\n");
898
899         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
900         checkGLcall("glMapBufferARB");
901
902         /* Make sure the PBO isn't set anymore in order not to break non-PBO
903          * calls. */
904         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
905         checkGLcall("glBindBufferARB");
906
907         LEAVE_GL();
908         context_release(context);
909     }
910
911     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
912     {
913         if (!rect)
914             surface_add_dirty_rect(surface, NULL);
915         else
916         {
917             WINED3DBOX b;
918
919             b.Left = rect->left;
920             b.Top = rect->top;
921             b.Right = rect->right;
922             b.Bottom = rect->bottom;
923             b.Front = 0;
924             b.Back = 1;
925             surface_add_dirty_rect(surface, &b);
926         }
927     }
928 }
929
930 static void surface_unmap(struct wined3d_surface *surface)
931 {
932     struct wined3d_device *device = surface->resource.device;
933     BOOL fullsurface;
934
935     TRACE("surface %p.\n", surface);
936
937     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
938
939     if (surface->flags & SFLAG_PBO)
940     {
941         const struct wined3d_gl_info *gl_info;
942         struct wined3d_context *context;
943
944         TRACE("Freeing PBO memory.\n");
945
946         context = context_acquire(device, NULL);
947         gl_info = context->gl_info;
948
949         ENTER_GL();
950         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
951         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
952         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
953         checkGLcall("glUnmapBufferARB");
954         LEAVE_GL();
955         context_release(context);
956
957         surface->resource.allocatedMemory = NULL;
958     }
959
960     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
961
962     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
963     {
964         TRACE("Not dirtified, nothing to do.\n");
965         goto done;
966     }
967
968     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
969             || (device->fb.render_targets && surface == device->fb.render_targets[0]))
970     {
971         if (wined3d_settings.rendertargetlock_mode == RTL_DISABLE)
972         {
973             static BOOL warned = FALSE;
974             if (!warned)
975             {
976                 ERR("The application tries to write to the render target, but render target locking is disabled.\n");
977                 warned = TRUE;
978             }
979             goto done;
980         }
981
982         if (!surface->dirtyRect.left && !surface->dirtyRect.top
983                 && surface->dirtyRect.right == surface->resource.width
984                 && surface->dirtyRect.bottom == surface->resource.height)
985         {
986             fullsurface = TRUE;
987         }
988         else
989         {
990             /* TODO: Proper partial rectangle tracking. */
991             fullsurface = FALSE;
992             surface->flags |= SFLAG_INSYSMEM;
993         }
994
995         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
996
997         /* Partial rectangle tracking is not commonly implemented, it is only
998          * done for render targets. INSYSMEM was set before to tell
999          * surface_load_location() where to read the rectangle from.
1000          * Indrawable is set because all modifications from the partial
1001          * sysmem copy are written back to the drawable, thus the surface is
1002          * merged again in the drawable. The sysmem copy is not fully up to
1003          * date because only a subrectangle was read in Map(). */
1004         if (!fullsurface)
1005         {
1006             surface_modify_location(surface, surface->draw_binding, TRUE);
1007             surface_evict_sysmem(surface);
1008         }
1009
1010         surface->dirtyRect.left = surface->resource.width;
1011         surface->dirtyRect.top = surface->resource.height;
1012         surface->dirtyRect.right = 0;
1013         surface->dirtyRect.bottom = 0;
1014     }
1015     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
1016     {
1017         FIXME("Depth / stencil buffer locking is not implemented.\n");
1018     }
1019
1020 done:
1021     /* Overlays have to be redrawn manually after changes with the GL implementation */
1022     if (surface->overlay_dest)
1023         surface->surface_ops->surface_draw_overlay(surface);
1024 }
1025
1026 static HRESULT surface_getdc(struct wined3d_surface *surface)
1027 {
1028     WINED3DLOCKED_RECT lock;
1029     HRESULT hr;
1030
1031     TRACE("surface %p.\n", surface);
1032
1033     /* Create a DIB section if there isn't a dc yet. */
1034     if (!surface->hDC)
1035     {
1036         if (surface->flags & SFLAG_CLIENT)
1037         {
1038             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1039             surface_release_client_storage(surface);
1040         }
1041         hr = surface_create_dib_section(surface);
1042         if (FAILED(hr))
1043             return WINED3DERR_INVALIDCALL;
1044
1045         /* Use the DIB section from now on if we are not using a PBO. */
1046         if (!(surface->flags & SFLAG_PBO))
1047             surface->resource.allocatedMemory = surface->dib.bitmap_data;
1048     }
1049
1050     /* Map the surface. */
1051     hr = wined3d_surface_map(surface, &lock, NULL, 0);
1052     if (FAILED(hr))
1053         ERR("Map failed, hr %#x.\n", hr);
1054
1055     /* Sync the DIB with the PBO. This can't be done earlier because Map()
1056      * activates the allocatedMemory. */
1057     if (surface->flags & SFLAG_PBO)
1058         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
1059
1060     return hr;
1061 }
1062
1063 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1064 {
1065     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1066         return FALSE;
1067     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1068         return FALSE;
1069     return TRUE;
1070 }
1071
1072 static void wined3d_surface_depth_blt_fbo(struct wined3d_device *device, struct wined3d_surface *src_surface,
1073         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1074 {
1075     const struct wined3d_gl_info *gl_info;
1076     struct wined3d_context *context;
1077     DWORD src_mask, dst_mask;
1078     GLbitfield gl_mask;
1079
1080     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1081             device, src_surface, wine_dbgstr_rect(src_rect),
1082             dst_surface, wine_dbgstr_rect(dst_rect));
1083
1084     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1085     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1086
1087     if (src_mask != dst_mask)
1088     {
1089         ERR("Incompatible formats %s and %s.\n",
1090                 debug_d3dformat(src_surface->resource.format->id),
1091                 debug_d3dformat(dst_surface->resource.format->id));
1092         return;
1093     }
1094
1095     if (!src_mask)
1096     {
1097         ERR("Not a depth / stencil format: %s.\n",
1098                 debug_d3dformat(src_surface->resource.format->id));
1099         return;
1100     }
1101
1102     gl_mask = 0;
1103     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1104         gl_mask |= GL_DEPTH_BUFFER_BIT;
1105     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1106         gl_mask |= GL_STENCIL_BUFFER_BIT;
1107
1108     /* Make sure the locations are up-to-date. Loading the destination
1109      * surface isn't required if the entire surface is overwritten. */
1110     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1111     if (!surface_is_full_rect(dst_surface, dst_rect))
1112         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1113
1114     context = context_acquire(device, NULL);
1115     if (!context->valid)
1116     {
1117         context_release(context);
1118         WARN("Invalid context, skipping blit.\n");
1119         return;
1120     }
1121
1122     gl_info = context->gl_info;
1123
1124     ENTER_GL();
1125
1126     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1127     glReadBuffer(GL_NONE);
1128     checkGLcall("glReadBuffer()");
1129     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1130
1131     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1132     context_set_draw_buffer(context, GL_NONE);
1133     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1134
1135     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1136     {
1137         glDepthMask(GL_TRUE);
1138         context_invalidate_state(context, STATE_RENDER(WINED3DRS_ZWRITEENABLE));
1139     }
1140     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1141     {
1142         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1143         {
1144             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1145             context_invalidate_state(context, STATE_RENDER(WINED3DRS_TWOSIDEDSTENCILMODE));
1146         }
1147         glStencilMask(~0U);
1148         context_invalidate_state(context, STATE_RENDER(WINED3DRS_STENCILWRITEMASK));
1149     }
1150
1151     glDisable(GL_SCISSOR_TEST);
1152     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1153
1154     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1155             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1156     checkGLcall("glBlitFramebuffer()");
1157
1158     LEAVE_GL();
1159
1160     if (wined3d_settings.strict_draw_ordering)
1161         wglFlush(); /* Flush to ensure ordering across contexts. */
1162
1163     context_release(context);
1164 }
1165
1166 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1167  * Depth / stencil is not supported. */
1168 static void surface_blt_fbo(struct wined3d_device *device, const WINED3DTEXTUREFILTERTYPE filter,
1169         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1170         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1171 {
1172     const struct wined3d_gl_info *gl_info;
1173     struct wined3d_context *context;
1174     RECT src_rect, dst_rect;
1175     GLenum gl_filter;
1176     GLenum buffer;
1177
1178     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1179     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1180             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1181     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1182             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1183
1184     src_rect = *src_rect_in;
1185     dst_rect = *dst_rect_in;
1186
1187     switch (filter)
1188     {
1189         case WINED3DTEXF_LINEAR:
1190             gl_filter = GL_LINEAR;
1191             break;
1192
1193         default:
1194             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1195         case WINED3DTEXF_NONE:
1196         case WINED3DTEXF_POINT:
1197             gl_filter = GL_NEAREST;
1198             break;
1199     }
1200
1201     /* Resolve the source surface first if needed. */
1202     if (src_location == SFLAG_INRB_MULTISAMPLE
1203             && (src_surface->resource.format->id != dst_surface->resource.format->id
1204                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1205                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1206         src_location = SFLAG_INRB_RESOLVED;
1207
1208     /* Make sure the locations are up-to-date. Loading the destination
1209      * surface isn't required if the entire surface is overwritten. (And is
1210      * in fact harmful if we're being called by surface_load_location() with
1211      * the purpose of loading the destination surface.) */
1212     surface_load_location(src_surface, src_location, NULL);
1213     if (!surface_is_full_rect(dst_surface, &dst_rect))
1214         surface_load_location(dst_surface, dst_location, NULL);
1215
1216     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1217     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1218     else context = context_acquire(device, NULL);
1219
1220     if (!context->valid)
1221     {
1222         context_release(context);
1223         WARN("Invalid context, skipping blit.\n");
1224         return;
1225     }
1226
1227     gl_info = context->gl_info;
1228
1229     if (src_location == SFLAG_INDRAWABLE)
1230     {
1231         TRACE("Source surface %p is onscreen.\n", src_surface);
1232         buffer = surface_get_gl_buffer(src_surface);
1233         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1234     }
1235     else
1236     {
1237         TRACE("Source surface %p is offscreen.\n", src_surface);
1238         buffer = GL_COLOR_ATTACHMENT0;
1239     }
1240
1241     ENTER_GL();
1242     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1243     glReadBuffer(buffer);
1244     checkGLcall("glReadBuffer()");
1245     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1246     LEAVE_GL();
1247
1248     if (dst_location == SFLAG_INDRAWABLE)
1249     {
1250         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1251         buffer = surface_get_gl_buffer(dst_surface);
1252         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1253     }
1254     else
1255     {
1256         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1257         buffer = GL_COLOR_ATTACHMENT0;
1258     }
1259
1260     ENTER_GL();
1261     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1262     context_set_draw_buffer(context, buffer);
1263     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1264     context_invalidate_state(context, STATE_FRAMEBUFFER);
1265
1266     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1267     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE));
1268     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE1));
1269     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE2));
1270     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE3));
1271
1272     glDisable(GL_SCISSOR_TEST);
1273     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1274
1275     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1276             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1277     checkGLcall("glBlitFramebuffer()");
1278
1279     LEAVE_GL();
1280
1281     if (wined3d_settings.strict_draw_ordering
1282             || (dst_location == SFLAG_INDRAWABLE
1283             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1284         wglFlush();
1285
1286     context_release(context);
1287 }
1288
1289 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1290         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
1291         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
1292 {
1293     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1294         return FALSE;
1295
1296     /* Source and/or destination need to be on the GL side */
1297     if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
1298         return FALSE;
1299
1300     switch (blit_op)
1301     {
1302         case WINED3D_BLIT_OP_COLOR_BLIT:
1303             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1304                 return FALSE;
1305             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1306                 return FALSE;
1307             break;
1308
1309         case WINED3D_BLIT_OP_DEPTH_BLIT:
1310             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1311                 return FALSE;
1312             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1313                 return FALSE;
1314             break;
1315
1316         default:
1317             return FALSE;
1318     }
1319
1320     if (!(src_format->id == dst_format->id
1321             || (is_identity_fixup(src_format->color_fixup)
1322             && is_identity_fixup(dst_format->color_fixup))))
1323         return FALSE;
1324
1325     return TRUE;
1326 }
1327
1328 /* This function checks if the primary render target uses the 8bit paletted format. */
1329 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1330 {
1331     if (device->fb.render_targets && device->fb.render_targets[0])
1332     {
1333         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1334         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1335                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1336             return TRUE;
1337     }
1338     return FALSE;
1339 }
1340
1341 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1342         DWORD color, WINED3DCOLORVALUE *float_color)
1343 {
1344     const struct wined3d_format *format = surface->resource.format;
1345     const struct wined3d_device *device = surface->resource.device;
1346
1347     switch (format->id)
1348     {
1349         case WINED3DFMT_P8_UINT:
1350             if (surface->palette)
1351             {
1352                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1353                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1354                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1355             }
1356             else
1357             {
1358                 float_color->r = 0.0f;
1359                 float_color->g = 0.0f;
1360                 float_color->b = 0.0f;
1361             }
1362             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1363             break;
1364
1365         case WINED3DFMT_B5G6R5_UNORM:
1366             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1367             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1368             float_color->b = (color & 0x1f) / 31.0f;
1369             float_color->a = 1.0f;
1370             break;
1371
1372         case WINED3DFMT_B8G8R8_UNORM:
1373         case WINED3DFMT_B8G8R8X8_UNORM:
1374             float_color->r = D3DCOLOR_R(color);
1375             float_color->g = D3DCOLOR_G(color);
1376             float_color->b = D3DCOLOR_B(color);
1377             float_color->a = 1.0f;
1378             break;
1379
1380         case WINED3DFMT_B8G8R8A8_UNORM:
1381             float_color->r = D3DCOLOR_R(color);
1382             float_color->g = D3DCOLOR_G(color);
1383             float_color->b = D3DCOLOR_B(color);
1384             float_color->a = D3DCOLOR_A(color);
1385             break;
1386
1387         default:
1388             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1389             return FALSE;
1390     }
1391
1392     return TRUE;
1393 }
1394
1395 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1396 {
1397     const struct wined3d_format *format = surface->resource.format;
1398
1399     switch (format->id)
1400     {
1401         case WINED3DFMT_S1_UINT_D15_UNORM:
1402             *float_depth = depth / (float)0x00007fff;
1403             break;
1404
1405         case WINED3DFMT_D16_UNORM:
1406             *float_depth = depth / (float)0x0000ffff;
1407             break;
1408
1409         case WINED3DFMT_D24_UNORM_S8_UINT:
1410         case WINED3DFMT_X8D24_UNORM:
1411             *float_depth = depth / (float)0x00ffffff;
1412             break;
1413
1414         case WINED3DFMT_D32_UNORM:
1415             *float_depth = depth / (float)0xffffffff;
1416             break;
1417
1418         default:
1419             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1420             return FALSE;
1421     }
1422
1423     return TRUE;
1424 }
1425
1426 /* Do not call while under the GL lock. */
1427 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1428 {
1429     const struct wined3d_resource *resource = &surface->resource;
1430     struct wined3d_device *device = resource->device;
1431     const struct blit_shader *blitter;
1432
1433     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1434             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1435     if (!blitter)
1436     {
1437         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1438         return WINED3DERR_INVALIDCALL;
1439     }
1440
1441     return blitter->depth_fill(device, surface, rect, depth);
1442 }
1443
1444 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1445         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1446 {
1447     struct wined3d_device *device = src_surface->resource.device;
1448
1449     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1450             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1451             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1452         return WINED3DERR_INVALIDCALL;
1453
1454     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1455
1456     surface_modify_ds_location(dst_surface, SFLAG_DS_OFFSCREEN,
1457             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1458     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
1459
1460     return WINED3D_OK;
1461 }
1462
1463 /* Do not call while under the GL lock. */
1464 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1465         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1466         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1467 {
1468     struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1469     struct wined3d_device *device = dst_surface->resource.device;
1470     DWORD src_ds_flags, dst_ds_flags;
1471     RECT src_rect, dst_rect;
1472
1473     static const DWORD simple_blit = WINEDDBLT_ASYNC
1474             | WINEDDBLT_COLORFILL
1475             | WINEDDBLT_WAIT
1476             | WINEDDBLT_DEPTHFILL
1477             | WINEDDBLT_DONOTWAIT;
1478
1479     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1480             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1481             flags, fx, debug_d3dtexturefiltertype(filter));
1482     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1483
1484     if (fx)
1485     {
1486         TRACE("dwSize %#x.\n", fx->dwSize);
1487         TRACE("dwDDFX %#x.\n", fx->dwDDFX);
1488         TRACE("dwROP %#x.\n", fx->dwROP);
1489         TRACE("dwDDROP %#x.\n", fx->dwDDROP);
1490         TRACE("dwRotationAngle %#x.\n", fx->dwRotationAngle);
1491         TRACE("dwZBufferOpCode %#x.\n", fx->dwZBufferOpCode);
1492         TRACE("dwZBufferLow %#x.\n", fx->dwZBufferLow);
1493         TRACE("dwZBufferHigh %#x.\n", fx->dwZBufferHigh);
1494         TRACE("dwZBufferBaseDest %#x.\n", fx->dwZBufferBaseDest);
1495         TRACE("dwZDestConstBitDepth %#x.\n", fx->dwZDestConstBitDepth);
1496         TRACE("lpDDSZBufferDest %p.\n", fx->u1.lpDDSZBufferDest);
1497         TRACE("dwZSrcConstBitDepth %#x.\n", fx->dwZSrcConstBitDepth);
1498         TRACE("lpDDSZBufferSrc %p.\n", fx->u2.lpDDSZBufferSrc);
1499         TRACE("dwAlphaEdgeBlendBitDepth %#x.\n", fx->dwAlphaEdgeBlendBitDepth);
1500         TRACE("dwAlphaEdgeBlend %#x.\n", fx->dwAlphaEdgeBlend);
1501         TRACE("dwReserved %#x.\n", fx->dwReserved);
1502         TRACE("dwAlphaDestConstBitDepth %#x.\n", fx->dwAlphaDestConstBitDepth);
1503         TRACE("lpDDSAlphaDest %p.\n", fx->u3.lpDDSAlphaDest);
1504         TRACE("dwAlphaSrcConstBitDepth %#x.\n", fx->dwAlphaSrcConstBitDepth);
1505         TRACE("lpDDSAlphaSrc %p.\n", fx->u4.lpDDSAlphaSrc);
1506         TRACE("lpDDSPattern %p.\n", fx->u5.lpDDSPattern);
1507         TRACE("ddckDestColorkey {%#x, %#x}.\n",
1508                 fx->ddckDestColorkey.dwColorSpaceLowValue,
1509                 fx->ddckDestColorkey.dwColorSpaceHighValue);
1510         TRACE("ddckSrcColorkey {%#x, %#x}.\n",
1511                 fx->ddckSrcColorkey.dwColorSpaceLowValue,
1512                 fx->ddckSrcColorkey.dwColorSpaceHighValue);
1513     }
1514
1515     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1516     {
1517         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1518         return WINEDDERR_SURFACEBUSY;
1519     }
1520
1521     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1522
1523     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1524             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1525             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1526             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1527             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1528     {
1529         /* The destination rect can be out of bounds on the condition
1530          * that a clipper is set for the surface. */
1531         if (dst_surface->clipper)
1532             FIXME("Blit clipping not implemented.\n");
1533         else
1534             WARN("The application gave us a bad destination rectangle without a clipper set.\n");
1535         return WINEDDERR_INVALIDRECT;
1536     }
1537
1538     if (src_surface)
1539     {
1540         surface_get_rect(src_surface, src_rect_in, &src_rect);
1541
1542         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1543                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1544                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1545                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1546                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1547         {
1548             WARN("Application gave us bad source rectangle for Blt.\n");
1549             return WINEDDERR_INVALIDRECT;
1550         }
1551     }
1552     else
1553     {
1554         memset(&src_rect, 0, sizeof(src_rect));
1555     }
1556
1557     if (!fx || !(fx->dwDDFX))
1558         flags &= ~WINEDDBLT_DDFX;
1559
1560     if (flags & WINEDDBLT_WAIT)
1561         flags &= ~WINEDDBLT_WAIT;
1562
1563     if (flags & WINEDDBLT_ASYNC)
1564     {
1565         static unsigned int once;
1566
1567         if (!once++)
1568             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1569         flags &= ~WINEDDBLT_ASYNC;
1570     }
1571
1572     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1573     if (flags & WINEDDBLT_DONOTWAIT)
1574     {
1575         static unsigned int once;
1576
1577         if (!once++)
1578             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1579         flags &= ~WINEDDBLT_DONOTWAIT;
1580     }
1581
1582     if (!device->d3d_initialized)
1583     {
1584         WARN("D3D not initialized, using fallback.\n");
1585         goto cpu;
1586     }
1587
1588     /* We want to avoid invalidating the sysmem location for converted
1589      * surfaces, since otherwise we'd have to convert the data back when
1590      * locking them. */
1591     if (dst_surface->flags & SFLAG_CONVERTED)
1592     {
1593         WARN("Converted surface, using CPU blit.\n");
1594         return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1595     }
1596
1597     if (flags & ~simple_blit)
1598     {
1599         WARN("Using fallback for complex blit (%#x).\n", flags);
1600         goto fallback;
1601     }
1602
1603     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1604         src_swapchain = src_surface->container.u.swapchain;
1605     else
1606         src_swapchain = NULL;
1607
1608     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1609         dst_swapchain = dst_surface->container.u.swapchain;
1610     else
1611         dst_swapchain = NULL;
1612
1613     /* This isn't strictly needed. FBO blits for example could deal with
1614      * cross-swapchain blits by first downloading the source to a texture
1615      * before switching to the destination context. We just have this here to
1616      * not have to deal with the issue, since cross-swapchain blits should be
1617      * rare. */
1618     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1619     {
1620         FIXME("Using fallback for cross-swapchain blit.\n");
1621         goto fallback;
1622     }
1623
1624     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1625     if (src_surface)
1626         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1627     else
1628         src_ds_flags = 0;
1629
1630     if (src_ds_flags || dst_ds_flags)
1631     {
1632         if (flags & WINEDDBLT_DEPTHFILL)
1633         {
1634             float depth;
1635
1636             TRACE("Depth fill.\n");
1637
1638             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1639                 return WINED3DERR_INVALIDCALL;
1640
1641             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1642                 return WINED3D_OK;
1643         }
1644         else
1645         {
1646             /* Accessing depth / stencil surfaces is supposed to fail while in
1647              * a scene, except for fills, which seem to work. */
1648             if (device->inScene)
1649             {
1650                 WARN("Rejecting depth / stencil access while in scene.\n");
1651                 return WINED3DERR_INVALIDCALL;
1652             }
1653
1654             if (src_ds_flags != dst_ds_flags)
1655             {
1656                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1657                 return WINED3DERR_INVALIDCALL;
1658             }
1659
1660             if (src_rect.top || src_rect.left
1661                     || src_rect.bottom != src_surface->resource.height
1662                     || src_rect.right != src_surface->resource.width)
1663             {
1664                 WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
1665                         wine_dbgstr_rect(&src_rect));
1666                 return WINED3DERR_INVALIDCALL;
1667             }
1668
1669             if (dst_rect.top || dst_rect.left
1670                     || dst_rect.bottom != dst_surface->resource.height
1671                     || dst_rect.right != dst_surface->resource.width)
1672             {
1673                 WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
1674                         wine_dbgstr_rect(&src_rect));
1675                 return WINED3DERR_INVALIDCALL;
1676             }
1677
1678             if (src_surface->resource.height != dst_surface->resource.height
1679                     || src_surface->resource.width != dst_surface->resource.width)
1680             {
1681                 WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
1682                 return WINED3DERR_INVALIDCALL;
1683             }
1684
1685             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1686                 return WINED3D_OK;
1687         }
1688     }
1689     else
1690     {
1691         if (flags & WINEDDBLT_COLORFILL)
1692         {
1693             WINED3DCOLORVALUE color;
1694
1695             TRACE("Color fill.\n");
1696
1697             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1698                 goto fallback;
1699
1700             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1701                 return WINED3D_OK;
1702         }
1703         else
1704         {
1705             TRACE("Color blit.\n");
1706
1707             /* Use present for back -> front blits. The idea behind this is
1708              * that present is potentially faster than a blit, in particular
1709              * when FBO blits aren't available. Some ddraw applications like
1710              * Half-Life and Prince of Persia 3D use Blt() from the backbuffer
1711              * to the frontbuffer instead of doing a Flip(). D3D8 and D3D9
1712              * applications can't blit directly to the frontbuffer. */
1713             if (dst_swapchain && dst_swapchain->back_buffers
1714                     && dst_surface == dst_swapchain->front_buffer
1715                     && src_surface == dst_swapchain->back_buffers[0])
1716             {
1717                 WINED3DSWAPEFFECT swap_effect = dst_swapchain->presentParms.SwapEffect;
1718
1719                 TRACE("Using present for backbuffer -> frontbuffer blit.\n");
1720
1721                 /* Set the swap effect to COPY, we don't want the backbuffer
1722                  * to become undefined. */
1723                 dst_swapchain->presentParms.SwapEffect = WINED3DSWAPEFFECT_COPY;
1724                 wined3d_swapchain_present(dst_swapchain, NULL, NULL, dst_swapchain->win_handle, NULL, 0);
1725                 dst_swapchain->presentParms.SwapEffect = swap_effect;
1726
1727                 return WINED3D_OK;
1728             }
1729
1730             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1731                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1732                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1733             {
1734                 TRACE("Using FBO blit.\n");
1735
1736                 surface_blt_fbo(device, filter,
1737                         src_surface, src_surface->draw_binding, &src_rect,
1738                         dst_surface, dst_surface->draw_binding, &dst_rect);
1739                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1740                 return WINED3D_OK;
1741             }
1742
1743             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1744                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1745                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1746             {
1747                 TRACE("Using arbfp blit.\n");
1748
1749                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1750                     return WINED3D_OK;
1751             }
1752         }
1753     }
1754
1755 fallback:
1756
1757     /* Special cases for render targets. */
1758     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1759             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1760     {
1761         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1762                 src_surface, &src_rect, flags, fx, filter)))
1763             return WINED3D_OK;
1764     }
1765
1766 cpu:
1767
1768     /* For the rest call the X11 surface implementation. For render targets
1769      * this should be implemented OpenGL accelerated in BltOverride, other
1770      * blits are rather rare. */
1771     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1772 }
1773
1774 /* Do not call while under the GL lock. */
1775 HRESULT CDECL wined3d_surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
1776         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD trans)
1777 {
1778     RECT src_rect, dst_rect;
1779     DWORD flags = 0;
1780
1781     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect_in %s, trans %#x.\n",
1782             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect_in), trans);
1783
1784     surface_get_rect(src_surface, src_rect_in, &src_rect);
1785
1786     dst_rect.left = dst_x;
1787     dst_rect.top = dst_y;
1788     dst_rect.right = dst_x + src_rect.right - src_rect.left;
1789     dst_rect.bottom = dst_y + src_rect.bottom - src_rect.top;
1790
1791     if (trans & WINEDDBLTFAST_SRCCOLORKEY)
1792         flags |= WINEDDBLT_KEYSRC;
1793     if (trans & WINEDDBLTFAST_DESTCOLORKEY)
1794         flags |= WINEDDBLT_KEYDEST;
1795     if (trans & WINEDDBLTFAST_WAIT)
1796         flags |= WINEDDBLT_WAIT;
1797     if (trans & WINEDDBLTFAST_DONOTWAIT)
1798         flags |= WINEDDBLT_DONOTWAIT;
1799
1800     return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, NULL, WINED3DTEXF_POINT);
1801 }
1802
1803 /* Context activation is done by the caller. */
1804 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1805 {
1806     if (!surface->resource.heapMemory)
1807     {
1808         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1809         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1810                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1811     }
1812
1813     ENTER_GL();
1814     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1815     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1816     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1817             surface->resource.size, surface->resource.allocatedMemory));
1818     checkGLcall("glGetBufferSubDataARB");
1819     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1820     checkGLcall("glDeleteBuffersARB");
1821     LEAVE_GL();
1822
1823     surface->pbo = 0;
1824     surface->flags &= ~SFLAG_PBO;
1825 }
1826
1827 /* Do not call while under the GL lock. */
1828 static void surface_unload(struct wined3d_resource *resource)
1829 {
1830     struct wined3d_surface *surface = surface_from_resource(resource);
1831     struct wined3d_renderbuffer_entry *entry, *entry2;
1832     struct wined3d_device *device = resource->device;
1833     const struct wined3d_gl_info *gl_info;
1834     struct wined3d_context *context;
1835
1836     TRACE("surface %p.\n", surface);
1837
1838     if (resource->pool == WINED3DPOOL_DEFAULT)
1839     {
1840         /* Default pool resources are supposed to be destroyed before Reset is called.
1841          * Implicit resources stay however. So this means we have an implicit render target
1842          * or depth stencil. The content may be destroyed, but we still have to tear down
1843          * opengl resources, so we cannot leave early.
1844          *
1845          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1846          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1847          * or the depth stencil into an FBO the texture or render buffer will be removed
1848          * and all flags get lost
1849          */
1850         surface_init_sysmem(surface);
1851     }
1852     else
1853     {
1854         /* Load the surface into system memory */
1855         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1856         surface_modify_location(surface, surface->draw_binding, FALSE);
1857     }
1858     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1859     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1860     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1861
1862     context = context_acquire(device, NULL);
1863     gl_info = context->gl_info;
1864
1865     /* Destroy PBOs, but load them into real sysmem before */
1866     if (surface->flags & SFLAG_PBO)
1867         surface_remove_pbo(surface, gl_info);
1868
1869     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1870      * all application-created targets the application has to release the surface
1871      * before calling _Reset
1872      */
1873     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1874     {
1875         ENTER_GL();
1876         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1877         LEAVE_GL();
1878         list_remove(&entry->entry);
1879         HeapFree(GetProcessHeap(), 0, entry);
1880     }
1881     list_init(&surface->renderbuffers);
1882     surface->current_renderbuffer = NULL;
1883
1884     ENTER_GL();
1885
1886     /* If we're in a texture, the texture name belongs to the texture.
1887      * Otherwise, destroy it. */
1888     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1889     {
1890         glDeleteTextures(1, &surface->texture_name);
1891         surface->texture_name = 0;
1892         glDeleteTextures(1, &surface->texture_name_srgb);
1893         surface->texture_name_srgb = 0;
1894     }
1895     if (surface->rb_multisample)
1896     {
1897         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1898         surface->rb_multisample = 0;
1899     }
1900     if (surface->rb_resolved)
1901     {
1902         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1903         surface->rb_resolved = 0;
1904     }
1905
1906     LEAVE_GL();
1907
1908     context_release(context);
1909
1910     resource_unload(resource);
1911 }
1912
1913 static const struct wined3d_resource_ops surface_resource_ops =
1914 {
1915     surface_unload,
1916 };
1917
1918 static const struct wined3d_surface_ops surface_ops =
1919 {
1920     surface_private_setup,
1921     surface_cleanup,
1922     surface_realize_palette,
1923     surface_draw_overlay,
1924     surface_preload,
1925     surface_map,
1926     surface_unmap,
1927     surface_getdc,
1928 };
1929
1930 /*****************************************************************************
1931  * Initializes the GDI surface, aka creates the DIB section we render to
1932  * The DIB section creation is done by calling GetDC, which will create the
1933  * section and releasing the dc to allow the app to use it. The dib section
1934  * will stay until the surface is released
1935  *
1936  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1937  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1938  * avoid confusion in the shared surface code.
1939  *
1940  * Returns:
1941  *  WINED3D_OK on success
1942  *  The return values of called methods on failure
1943  *
1944  *****************************************************************************/
1945 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1946 {
1947     HRESULT hr;
1948
1949     TRACE("surface %p.\n", surface);
1950
1951     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1952     {
1953         ERR("Overlays not yet supported by GDI surfaces.\n");
1954         return WINED3DERR_INVALIDCALL;
1955     }
1956
1957     /* Sysmem textures have memory already allocated - release it,
1958      * this avoids an unnecessary memcpy. */
1959     hr = surface_create_dib_section(surface);
1960     if (SUCCEEDED(hr))
1961     {
1962         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1963         surface->resource.heapMemory = NULL;
1964         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1965     }
1966
1967     /* We don't mind the nonpow2 stuff in GDI. */
1968     surface->pow2Width = surface->resource.width;
1969     surface->pow2Height = surface->resource.height;
1970
1971     return WINED3D_OK;
1972 }
1973
1974 static void surface_gdi_cleanup(struct wined3d_surface *surface)
1975 {
1976     TRACE("surface %p.\n", surface);
1977
1978     if (surface->flags & SFLAG_DIBSECTION)
1979     {
1980         /* Release the DC. */
1981         SelectObject(surface->hDC, surface->dib.holdbitmap);
1982         DeleteDC(surface->hDC);
1983         /* Release the DIB section. */
1984         DeleteObject(surface->dib.DIBsection);
1985         surface->dib.bitmap_data = NULL;
1986         surface->resource.allocatedMemory = NULL;
1987     }
1988
1989     if (surface->flags & SFLAG_USERPTR)
1990         wined3d_surface_set_mem(surface, NULL);
1991     if (surface->overlay_dest)
1992         list_remove(&surface->overlay_entry);
1993
1994     HeapFree(GetProcessHeap(), 0, surface->palette9);
1995
1996     resource_cleanup(&surface->resource);
1997 }
1998
1999 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
2000 {
2001     struct wined3d_palette *palette = surface->palette;
2002
2003     TRACE("surface %p.\n", surface);
2004
2005     if (!palette) return;
2006
2007     if (surface->flags & SFLAG_DIBSECTION)
2008     {
2009         RGBQUAD col[256];
2010         unsigned int i;
2011
2012         TRACE("Updating the DC's palette.\n");
2013
2014         for (i = 0; i < 256; ++i)
2015         {
2016             col[i].rgbRed = palette->palents[i].peRed;
2017             col[i].rgbGreen = palette->palents[i].peGreen;
2018             col[i].rgbBlue = palette->palents[i].peBlue;
2019             col[i].rgbReserved = 0;
2020         }
2021         SetDIBColorTable(surface->hDC, 0, 256, col);
2022     }
2023
2024     /* Update the image because of the palette change. Some games like e.g.
2025      * Red Alert call SetEntries a lot to implement fading. */
2026     /* Tell the swapchain to update the screen. */
2027     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2028     {
2029         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2030         if (surface == swapchain->front_buffer)
2031         {
2032             x11_copy_to_screen(swapchain, NULL);
2033         }
2034     }
2035 }
2036
2037 static HRESULT gdi_surface_draw_overlay(struct wined3d_surface *surface)
2038 {
2039     FIXME("GDI surfaces can't draw overlays yet.\n");
2040     return E_FAIL;
2041 }
2042
2043 static void gdi_surface_preload(struct wined3d_surface *surface)
2044 {
2045     TRACE("surface %p.\n", surface);
2046
2047     ERR("Preloading GDI surfaces is not supported.\n");
2048 }
2049
2050 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
2051 {
2052     TRACE("surface %p, rect %s, flags %#x.\n",
2053             surface, wine_dbgstr_rect(rect), flags);
2054
2055     if (!surface->resource.allocatedMemory)
2056     {
2057         /* This happens on gdi surfaces if the application set a user pointer
2058          * and resets it. Recreate the DIB section. */
2059         surface_create_dib_section(surface);
2060         surface->resource.allocatedMemory = surface->dib.bitmap_data;
2061     }
2062 }
2063
2064 static void gdi_surface_unmap(struct wined3d_surface *surface)
2065 {
2066     TRACE("surface %p.\n", surface);
2067
2068     /* Tell the swapchain to update the screen. */
2069     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2070     {
2071         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2072         if (surface == swapchain->front_buffer)
2073         {
2074             x11_copy_to_screen(swapchain, &surface->lockedRect);
2075         }
2076     }
2077
2078     memset(&surface->lockedRect, 0, sizeof(RECT));
2079 }
2080
2081 static HRESULT gdi_surface_getdc(struct wined3d_surface *surface)
2082 {
2083     WINED3DLOCKED_RECT lock;
2084     HRESULT hr;
2085
2086     TRACE("surface %p.\n", surface);
2087
2088     /* Should have a DIB section already. */
2089     if (!(surface->flags & SFLAG_DIBSECTION))
2090     {
2091         WARN("DC not supported on this surface\n");
2092         return WINED3DERR_INVALIDCALL;
2093     }
2094
2095     /* Map the surface. */
2096     hr = wined3d_surface_map(surface, &lock, NULL, 0);
2097     if (FAILED(hr))
2098         ERR("Map failed, hr %#x.\n", hr);
2099
2100     return hr;
2101 }
2102
2103 static const struct wined3d_surface_ops gdi_surface_ops =
2104 {
2105     gdi_surface_private_setup,
2106     surface_gdi_cleanup,
2107     gdi_surface_realize_palette,
2108     gdi_surface_draw_overlay,
2109     gdi_surface_preload,
2110     gdi_surface_map,
2111     gdi_surface_unmap,
2112     gdi_surface_getdc,
2113 };
2114
2115 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2116 {
2117     GLuint *name;
2118     DWORD flag;
2119
2120     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2121
2122     if(srgb)
2123     {
2124         name = &surface->texture_name_srgb;
2125         flag = SFLAG_INSRGBTEX;
2126     }
2127     else
2128     {
2129         name = &surface->texture_name;
2130         flag = SFLAG_INTEXTURE;
2131     }
2132
2133     if (!*name && new_name)
2134     {
2135         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2136          * surface has no texture name yet. See if we can get rid of this. */
2137         if (surface->flags & flag)
2138             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2139         surface_modify_location(surface, flag, FALSE);
2140     }
2141
2142     *name = new_name;
2143     surface_force_reload(surface);
2144 }
2145
2146 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
2147 {
2148     TRACE("surface %p, target %#x.\n", surface, target);
2149
2150     if (surface->texture_target != target)
2151     {
2152         if (target == GL_TEXTURE_RECTANGLE_ARB)
2153         {
2154             surface->flags &= ~SFLAG_NORMCOORD;
2155         }
2156         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2157         {
2158             surface->flags |= SFLAG_NORMCOORD;
2159         }
2160     }
2161     surface->texture_target = target;
2162     surface_force_reload(surface);
2163 }
2164
2165 /* Context activation is done by the caller. */
2166 void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
2167 {
2168     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
2169
2170     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2171     {
2172         struct wined3d_texture *texture = surface->container.u.texture;
2173
2174         TRACE("Passing to container (%p).\n", texture);
2175         texture->texture_ops->texture_bind(texture, context, srgb);
2176     }
2177     else
2178     {
2179         if (surface->texture_level)
2180         {
2181             ERR("Standalone surface %p is non-zero texture level %u.\n",
2182                     surface, surface->texture_level);
2183         }
2184
2185         if (srgb)
2186             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
2187
2188         ENTER_GL();
2189
2190         if (!surface->texture_name)
2191         {
2192             glGenTextures(1, &surface->texture_name);
2193             checkGLcall("glGenTextures");
2194
2195             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
2196
2197             context_bind_texture(context, surface->texture_target, surface->texture_name);
2198             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2199             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2200             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
2201             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2202             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2203             checkGLcall("glTexParameteri");
2204         }
2205         else
2206         {
2207             context_bind_texture(context, surface->texture_target, surface->texture_name);
2208         }
2209
2210         LEAVE_GL();
2211     }
2212 }
2213
2214 /* This call just downloads data, the caller is responsible for binding the
2215  * correct texture. */
2216 /* Context activation is done by the caller. */
2217 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2218 {
2219     const struct wined3d_format *format = surface->resource.format;
2220
2221     /* Only support read back of converted P8 surfaces. */
2222     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2223     {
2224         ERR("Trying to read back converted surface %p with format %s.\n", surface, debug_d3dformat(format->id));
2225         return;
2226     }
2227
2228     ENTER_GL();
2229
2230     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2231     {
2232         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2233                 surface, surface->texture_level, format->glFormat, format->glType,
2234                 surface->resource.allocatedMemory);
2235
2236         if (surface->flags & SFLAG_PBO)
2237         {
2238             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2239             checkGLcall("glBindBufferARB");
2240             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2241             checkGLcall("glGetCompressedTexImageARB");
2242             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2243             checkGLcall("glBindBufferARB");
2244         }
2245         else
2246         {
2247             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2248                     surface->texture_level, surface->resource.allocatedMemory));
2249             checkGLcall("glGetCompressedTexImageARB");
2250         }
2251
2252         LEAVE_GL();
2253     }
2254     else
2255     {
2256         void *mem;
2257         GLenum gl_format = format->glFormat;
2258         GLenum gl_type = format->glType;
2259         int src_pitch = 0;
2260         int dst_pitch = 0;
2261
2262         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2263         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2264         {
2265             gl_format = GL_ALPHA;
2266             gl_type = GL_UNSIGNED_BYTE;
2267         }
2268
2269         if (surface->flags & SFLAG_NONPOW2)
2270         {
2271             unsigned char alignment = surface->resource.device->surface_alignment;
2272             src_pitch = format->byte_count * surface->pow2Width;
2273             dst_pitch = wined3d_surface_get_pitch(surface);
2274             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2275             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2276         }
2277         else
2278         {
2279             mem = surface->resource.allocatedMemory;
2280         }
2281
2282         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2283                 surface, surface->texture_level, gl_format, gl_type, mem);
2284
2285         if (surface->flags & SFLAG_PBO)
2286         {
2287             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2288             checkGLcall("glBindBufferARB");
2289
2290             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2291             checkGLcall("glGetTexImage");
2292
2293             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2294             checkGLcall("glBindBufferARB");
2295         }
2296         else
2297         {
2298             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2299             checkGLcall("glGetTexImage");
2300         }
2301         LEAVE_GL();
2302
2303         if (surface->flags & SFLAG_NONPOW2)
2304         {
2305             const BYTE *src_data;
2306             BYTE *dst_data;
2307             UINT y;
2308             /*
2309              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2310              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2311              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2312              *
2313              * We're doing this...
2314              *
2315              * instead of boxing the texture :
2316              * |<-texture width ->|  -->pow2width|   /\
2317              * |111111111111111111|              |   |
2318              * |222 Texture 222222| boxed empty  | texture height
2319              * |3333 Data 33333333|              |   |
2320              * |444444444444444444|              |   \/
2321              * -----------------------------------   |
2322              * |     boxed  empty | boxed empty  | pow2height
2323              * |                  |              |   \/
2324              * -----------------------------------
2325              *
2326              *
2327              * we're repacking the data to the expected texture width
2328              *
2329              * |<-texture width ->|  -->pow2width|   /\
2330              * |111111111111111111222222222222222|   |
2331              * |222333333333333333333444444444444| texture height
2332              * |444444                           |   |
2333              * |                                 |   \/
2334              * |                                 |   |
2335              * |            empty                | pow2height
2336              * |                                 |   \/
2337              * -----------------------------------
2338              *
2339              * == is the same as
2340              *
2341              * |<-texture width ->|    /\
2342              * |111111111111111111|
2343              * |222222222222222222|texture height
2344              * |333333333333333333|
2345              * |444444444444444444|    \/
2346              * --------------------
2347              *
2348              * this also means that any references to allocatedMemory should work with the data as if were a
2349              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2350              *
2351              * internally the texture is still stored in a boxed format so any references to textureName will
2352              * get a boxed texture with width pow2width and not a texture of width resource.width.
2353              *
2354              * Performance should not be an issue, because applications normally do not lock the surfaces when
2355              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2356              * and doesn't have to be re-read. */
2357             src_data = mem;
2358             dst_data = surface->resource.allocatedMemory;
2359             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2360             for (y = 1; y < surface->resource.height; ++y)
2361             {
2362                 /* skip the first row */
2363                 src_data += src_pitch;
2364                 dst_data += dst_pitch;
2365                 memcpy(dst_data, src_data, dst_pitch);
2366             }
2367
2368             HeapFree(GetProcessHeap(), 0, mem);
2369         }
2370     }
2371
2372     /* Surface has now been downloaded */
2373     surface->flags |= SFLAG_INSYSMEM;
2374 }
2375
2376 /* This call just uploads data, the caller is responsible for binding the
2377  * correct texture. */
2378 /* Context activation is done by the caller. */
2379 void surface_upload_data(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2380         const struct wined3d_format *format, const RECT *src_rect, UINT src_w, const POINT *dst_point,
2381         BOOL srgb, const struct wined3d_bo_address *data)
2382 {
2383     UINT update_w = src_rect->right - src_rect->left;
2384     UINT update_h = src_rect->bottom - src_rect->top;
2385
2386     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_w %u, dst_point %p, srgb %#x, data {%#x:%p}.\n",
2387             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_w,
2388             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2389
2390     if (format->heightscale != 1.0f && format->heightscale != 0.0f)
2391         update_h *= format->heightscale;
2392
2393     ENTER_GL();
2394
2395     if (data->buffer_object)
2396     {
2397         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2398         checkGLcall("glBindBufferARB");
2399     }
2400
2401     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2402     {
2403         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2404         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2405         UINT src_pitch = wined3d_format_calculate_size(format, 1, src_w, 1);
2406         const BYTE *addr = data->addr;
2407         GLenum internal;
2408
2409         addr += (src_rect->top / format->block_height) * src_pitch;
2410         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2411
2412         if (srgb)
2413             internal = format->glGammaInternal;
2414         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2415             internal = format->rtInternal;
2416         else
2417             internal = format->glInternal;
2418
2419         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2420                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2421                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2422
2423         if (row_length == src_pitch)
2424         {
2425             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2426                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2427         }
2428         else
2429         {
2430             UINT row, y;
2431
2432             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2433              * can't use the unpack row length like below. */
2434             for (row = 0, y = dst_point->y; row < row_count; ++row)
2435             {
2436                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2437                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2438                 y += format->block_height;
2439                 addr += src_pitch;
2440             }
2441         }
2442         checkGLcall("glCompressedTexSubImage2DARB");
2443     }
2444     else
2445     {
2446         const BYTE *addr = data->addr;
2447
2448         addr += src_rect->top * src_w * format->byte_count;
2449         addr += src_rect->left * format->byte_count;
2450
2451         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2452                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2453                 update_w, update_h, format->glFormat, format->glType, addr);
2454
2455         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_w);
2456         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2457                 update_w, update_h, format->glFormat, format->glType, addr);
2458         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2459         checkGLcall("glTexSubImage2D");
2460     }
2461
2462     if (data->buffer_object)
2463     {
2464         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2465         checkGLcall("glBindBufferARB");
2466     }
2467
2468     LEAVE_GL();
2469
2470     if (wined3d_settings.strict_draw_ordering)
2471         wglFlush();
2472
2473     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2474     {
2475         struct wined3d_device *device = surface->resource.device;
2476         unsigned int i;
2477
2478         for (i = 0; i < device->context_count; ++i)
2479         {
2480             context_surface_update(device->contexts[i], surface);
2481         }
2482     }
2483 }
2484
2485 /* This call just allocates the texture, the caller is responsible for binding
2486  * the correct texture. */
2487 /* Context activation is done by the caller. */
2488 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2489         const struct wined3d_format *format, BOOL srgb)
2490 {
2491     BOOL enable_client_storage = FALSE;
2492     GLsizei width = surface->pow2Width;
2493     GLsizei height = surface->pow2Height;
2494     const BYTE *mem = NULL;
2495     GLenum internal;
2496
2497     if (srgb)
2498     {
2499         internal = format->glGammaInternal;
2500     }
2501     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2502     {
2503         internal = format->rtInternal;
2504     }
2505     else
2506     {
2507         internal = format->glInternal;
2508     }
2509
2510     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2511
2512     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2513             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2514             internal, width, height, format->glFormat, format->glType);
2515
2516     ENTER_GL();
2517
2518     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2519     {
2520         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2521                 || !surface->resource.allocatedMemory)
2522         {
2523             /* In some cases we want to disable client storage.
2524              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2525              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2526              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2527              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2528              */
2529             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2530             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2531             surface->flags &= ~SFLAG_CLIENT;
2532             enable_client_storage = TRUE;
2533         }
2534         else
2535         {
2536             surface->flags |= SFLAG_CLIENT;
2537
2538             /* Point OpenGL to our allocated texture memory. Do not use
2539              * resource.allocatedMemory here because it might point into a
2540              * PBO. Instead use heapMemory, but get the alignment right. */
2541             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2542                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2543         }
2544     }
2545
2546     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2547     {
2548         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2549                 internal, width, height, 0, surface->resource.size, mem));
2550         checkGLcall("glCompressedTexImage2DARB");
2551     }
2552     else
2553     {
2554         glTexImage2D(surface->texture_target, surface->texture_level,
2555                 internal, width, height, 0, format->glFormat, format->glType, mem);
2556         checkGLcall("glTexImage2D");
2557     }
2558
2559     if(enable_client_storage) {
2560         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2561         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2562     }
2563     LEAVE_GL();
2564 }
2565
2566 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2567  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2568 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2569 /* GL locking is done by the caller */
2570 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2571 {
2572     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2573     struct wined3d_renderbuffer_entry *entry;
2574     GLuint renderbuffer = 0;
2575     unsigned int src_width, src_height;
2576     unsigned int width, height;
2577
2578     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2579     {
2580         width = rt->pow2Width;
2581         height = rt->pow2Height;
2582     }
2583     else
2584     {
2585         width = surface->pow2Width;
2586         height = surface->pow2Height;
2587     }
2588
2589     src_width = surface->pow2Width;
2590     src_height = surface->pow2Height;
2591
2592     /* A depth stencil smaller than the render target is not valid */
2593     if (width > src_width || height > src_height) return;
2594
2595     /* Remove any renderbuffer set if the sizes match */
2596     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2597             || (width == src_width && height == src_height))
2598     {
2599         surface->current_renderbuffer = NULL;
2600         return;
2601     }
2602
2603     /* Look if we've already got a renderbuffer of the correct dimensions */
2604     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2605     {
2606         if (entry->width == width && entry->height == height)
2607         {
2608             renderbuffer = entry->id;
2609             surface->current_renderbuffer = entry;
2610             break;
2611         }
2612     }
2613
2614     if (!renderbuffer)
2615     {
2616         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2617         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2618         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2619                 surface->resource.format->glInternal, width, height);
2620
2621         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2622         entry->width = width;
2623         entry->height = height;
2624         entry->id = renderbuffer;
2625         list_add_head(&surface->renderbuffers, &entry->entry);
2626
2627         surface->current_renderbuffer = entry;
2628     }
2629
2630     checkGLcall("set_compatible_renderbuffer");
2631 }
2632
2633 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2634 {
2635     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2636
2637     TRACE("surface %p.\n", surface);
2638
2639     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2640     {
2641         ERR("Surface %p is not on a swapchain.\n", surface);
2642         return GL_NONE;
2643     }
2644
2645     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2646     {
2647         if (swapchain->render_to_fbo)
2648         {
2649             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2650             return GL_COLOR_ATTACHMENT0;
2651         }
2652         TRACE("Returning GL_BACK\n");
2653         return GL_BACK;
2654     }
2655     else if (surface == swapchain->front_buffer)
2656     {
2657         TRACE("Returning GL_FRONT\n");
2658         return GL_FRONT;
2659     }
2660
2661     FIXME("Higher back buffer, returning GL_BACK\n");
2662     return GL_BACK;
2663 }
2664
2665 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2666 void surface_add_dirty_rect(struct wined3d_surface *surface, const WINED3DBOX *dirty_rect)
2667 {
2668     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2669
2670     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2671         /* No partial locking for textures yet. */
2672         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2673
2674     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2675     if (dirty_rect)
2676     {
2677         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->Left);
2678         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->Top);
2679         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->Right);
2680         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->Bottom);
2681     }
2682     else
2683     {
2684         surface->dirtyRect.left = 0;
2685         surface->dirtyRect.top = 0;
2686         surface->dirtyRect.right = surface->resource.width;
2687         surface->dirtyRect.bottom = surface->resource.height;
2688     }
2689
2690     /* if the container is a texture then mark it dirty. */
2691     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2692     {
2693         TRACE("Passing to container.\n");
2694         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2695     }
2696 }
2697
2698 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2699 {
2700     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2701     BOOL ck_changed;
2702
2703     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2704
2705     if (surface->resource.pool == WINED3DPOOL_SCRATCH)
2706     {
2707         ERR("Not supported on scratch surfaces.\n");
2708         return WINED3DERR_INVALIDCALL;
2709     }
2710
2711     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2712
2713     /* Reload if either the texture and sysmem have different ideas about the
2714      * color key, or the actual key values changed. */
2715     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2716             && (surface->glCKey.dwColorSpaceLowValue != surface->SrcBltCKey.dwColorSpaceLowValue
2717             || surface->glCKey.dwColorSpaceHighValue != surface->SrcBltCKey.dwColorSpaceHighValue)))
2718     {
2719         TRACE("Reloading because of color keying\n");
2720         /* To perform the color key conversion we need a sysmem copy of
2721          * the surface. Make sure we have it. */
2722
2723         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2724         /* Make sure the texture is reloaded because of the color key change,
2725          * this kills performance though :( */
2726         /* TODO: This is not necessarily needed with hw palettized texture support. */
2727         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2728         /* Switching color keying on / off may change the internal format. */
2729         if (ck_changed)
2730             surface_force_reload(surface);
2731     }
2732     else if (!(surface->flags & flag))
2733     {
2734         TRACE("Reloading because surface is dirty.\n");
2735     }
2736     else
2737     {
2738         TRACE("surface is already in texture\n");
2739         return WINED3D_OK;
2740     }
2741
2742     /* No partial locking for textures yet. */
2743     surface_load_location(surface, flag, NULL);
2744     surface_evict_sysmem(surface);
2745
2746     return WINED3D_OK;
2747 }
2748
2749 /* See also float_16_to_32() in wined3d_private.h */
2750 static inline unsigned short float_32_to_16(const float *in)
2751 {
2752     int exp = 0;
2753     float tmp = fabsf(*in);
2754     unsigned int mantissa;
2755     unsigned short ret;
2756
2757     /* Deal with special numbers */
2758     if (*in == 0.0f)
2759         return 0x0000;
2760     if (isnan(*in))
2761         return 0x7c01;
2762     if (isinf(*in))
2763         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2764
2765     if (tmp < powf(2, 10))
2766     {
2767         do
2768         {
2769             tmp = tmp * 2.0f;
2770             exp--;
2771         } while (tmp < powf(2, 10));
2772     }
2773     else if (tmp >= powf(2, 11))
2774     {
2775         do
2776         {
2777             tmp /= 2.0f;
2778             exp++;
2779         } while (tmp >= powf(2, 11));
2780     }
2781
2782     mantissa = (unsigned int)tmp;
2783     if (tmp - mantissa >= 0.5f)
2784         ++mantissa; /* Round to nearest, away from zero. */
2785
2786     exp += 10;  /* Normalize the mantissa. */
2787     exp += 15;  /* Exponent is encoded with excess 15. */
2788
2789     if (exp > 30) /* too big */
2790     {
2791         ret = 0x7c00; /* INF */
2792     }
2793     else if (exp <= 0)
2794     {
2795         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2796         while (exp <= 0)
2797         {
2798             mantissa = mantissa >> 1;
2799             ++exp;
2800         }
2801         ret = mantissa & 0x3ff;
2802     }
2803     else
2804     {
2805         ret = (exp << 10) | (mantissa & 0x3ff);
2806     }
2807
2808     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2809     return ret;
2810 }
2811
2812 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2813 {
2814     ULONG refcount;
2815
2816     TRACE("Surface %p, container %p of type %#x.\n",
2817             surface, surface->container.u.base, surface->container.type);
2818
2819     switch (surface->container.type)
2820     {
2821         case WINED3D_CONTAINER_TEXTURE:
2822             return wined3d_texture_incref(surface->container.u.texture);
2823
2824         case WINED3D_CONTAINER_SWAPCHAIN:
2825             return wined3d_swapchain_incref(surface->container.u.swapchain);
2826
2827         default:
2828             ERR("Unhandled container type %#x.\n", surface->container.type);
2829         case WINED3D_CONTAINER_NONE:
2830             break;
2831     }
2832
2833     refcount = InterlockedIncrement(&surface->resource.ref);
2834     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2835
2836     return refcount;
2837 }
2838
2839 /* Do not call while under the GL lock. */
2840 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2841 {
2842     ULONG refcount;
2843
2844     TRACE("Surface %p, container %p of type %#x.\n",
2845             surface, surface->container.u.base, surface->container.type);
2846
2847     switch (surface->container.type)
2848     {
2849         case WINED3D_CONTAINER_TEXTURE:
2850             return wined3d_texture_decref(surface->container.u.texture);
2851
2852         case WINED3D_CONTAINER_SWAPCHAIN:
2853             return wined3d_swapchain_decref(surface->container.u.swapchain);
2854
2855         default:
2856             ERR("Unhandled container type %#x.\n", surface->container.type);
2857         case WINED3D_CONTAINER_NONE:
2858             break;
2859     }
2860
2861     refcount = InterlockedDecrement(&surface->resource.ref);
2862     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2863
2864     if (!refcount)
2865     {
2866         surface->surface_ops->surface_cleanup(surface);
2867         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2868
2869         TRACE("Destroyed surface %p.\n", surface);
2870         HeapFree(GetProcessHeap(), 0, surface);
2871     }
2872
2873     return refcount;
2874 }
2875
2876 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2877 {
2878     return resource_set_priority(&surface->resource, priority);
2879 }
2880
2881 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2882 {
2883     return resource_get_priority(&surface->resource);
2884 }
2885
2886 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2887 {
2888     TRACE("surface %p.\n", surface);
2889
2890     surface->surface_ops->surface_preload(surface);
2891 }
2892
2893 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2894 {
2895     TRACE("surface %p.\n", surface);
2896
2897     return surface->resource.parent;
2898 }
2899
2900 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2901 {
2902     TRACE("surface %p.\n", surface);
2903
2904     return &surface->resource;
2905 }
2906
2907 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2908 {
2909     TRACE("surface %p, flags %#x.\n", surface, flags);
2910
2911     switch (flags)
2912     {
2913         case WINEDDGBS_CANBLT:
2914         case WINEDDGBS_ISBLTDONE:
2915             return WINED3D_OK;
2916
2917         default:
2918             return WINED3DERR_INVALIDCALL;
2919     }
2920 }
2921
2922 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2923 {
2924     TRACE("surface %p, flags %#x.\n", surface, flags);
2925
2926     /* XXX: DDERR_INVALIDSURFACETYPE */
2927
2928     switch (flags)
2929     {
2930         case WINEDDGFS_CANFLIP:
2931         case WINEDDGFS_ISFLIPDONE:
2932             return WINED3D_OK;
2933
2934         default:
2935             return WINED3DERR_INVALIDCALL;
2936     }
2937 }
2938
2939 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2940 {
2941     TRACE("surface %p.\n", surface);
2942
2943     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2944     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2945 }
2946
2947 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2948 {
2949     TRACE("surface %p.\n", surface);
2950
2951     /* So far we don't lose anything :) */
2952     surface->flags &= ~SFLAG_LOST;
2953     return WINED3D_OK;
2954 }
2955
2956 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2957 {
2958     TRACE("surface %p, palette %p.\n", surface, palette);
2959
2960     if (surface->palette == palette)
2961     {
2962         TRACE("Nop palette change.\n");
2963         return WINED3D_OK;
2964     }
2965
2966     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
2967         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
2968
2969     surface->palette = palette;
2970
2971     if (palette)
2972     {
2973         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
2974             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
2975
2976         surface->surface_ops->surface_realize_palette(surface);
2977     }
2978
2979     return WINED3D_OK;
2980 }
2981
2982 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
2983         DWORD flags, const WINEDDCOLORKEY *color_key)
2984 {
2985     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
2986
2987     if (flags & WINEDDCKEY_COLORSPACE)
2988     {
2989         FIXME(" colorkey value not supported (%08x) !\n", flags);
2990         return WINED3DERR_INVALIDCALL;
2991     }
2992
2993     /* Dirtify the surface, but only if a key was changed. */
2994     if (color_key)
2995     {
2996         switch (flags & ~WINEDDCKEY_COLORSPACE)
2997         {
2998             case WINEDDCKEY_DESTBLT:
2999                 surface->DestBltCKey = *color_key;
3000                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3001                 break;
3002
3003             case WINEDDCKEY_DESTOVERLAY:
3004                 surface->DestOverlayCKey = *color_key;
3005                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3006                 break;
3007
3008             case WINEDDCKEY_SRCOVERLAY:
3009                 surface->SrcOverlayCKey = *color_key;
3010                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3011                 break;
3012
3013             case WINEDDCKEY_SRCBLT:
3014                 surface->SrcBltCKey = *color_key;
3015                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3016                 break;
3017         }
3018     }
3019     else
3020     {
3021         switch (flags & ~WINEDDCKEY_COLORSPACE)
3022         {
3023             case WINEDDCKEY_DESTBLT:
3024                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3025                 break;
3026
3027             case WINEDDCKEY_DESTOVERLAY:
3028                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3029                 break;
3030
3031             case WINEDDCKEY_SRCOVERLAY:
3032                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3033                 break;
3034
3035             case WINEDDCKEY_SRCBLT:
3036                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3037                 break;
3038         }
3039     }
3040
3041     return WINED3D_OK;
3042 }
3043
3044 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3045 {
3046     TRACE("surface %p.\n", surface);
3047
3048     return surface->palette;
3049 }
3050
3051 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3052 {
3053     const struct wined3d_format *format = surface->resource.format;
3054     DWORD pitch;
3055
3056     TRACE("surface %p.\n", surface);
3057
3058     if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3059     {
3060         /* Since compressed formats are block based, pitch means the amount of
3061          * bytes to the next row of block rather than the next row of pixels. */
3062         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3063         pitch = row_block_count * format->block_byte_count;
3064     }
3065     else
3066     {
3067         unsigned char alignment = surface->resource.device->surface_alignment;
3068         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3069         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3070     }
3071
3072     TRACE("Returning %u.\n", pitch);
3073
3074     return pitch;
3075 }
3076
3077 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3078 {
3079     TRACE("surface %p, mem %p.\n", surface, mem);
3080
3081     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
3082     {
3083         WARN("Surface is locked or the DC is in use.\n");
3084         return WINED3DERR_INVALIDCALL;
3085     }
3086
3087     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3088     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3089     {
3090         ERR("Not supported on render targets.\n");
3091         return WINED3DERR_INVALIDCALL;
3092     }
3093
3094     if (mem && mem != surface->resource.allocatedMemory)
3095     {
3096         void *release = NULL;
3097
3098         /* Do I have to copy the old surface content? */
3099         if (surface->flags & SFLAG_DIBSECTION)
3100         {
3101             SelectObject(surface->hDC, surface->dib.holdbitmap);
3102             DeleteDC(surface->hDC);
3103             /* Release the DIB section. */
3104             DeleteObject(surface->dib.DIBsection);
3105             surface->dib.bitmap_data = NULL;
3106             surface->resource.allocatedMemory = NULL;
3107             surface->hDC = NULL;
3108             surface->flags &= ~SFLAG_DIBSECTION;
3109         }
3110         else if (!(surface->flags & SFLAG_USERPTR))
3111         {
3112             release = surface->resource.heapMemory;
3113             surface->resource.heapMemory = NULL;
3114         }
3115         surface->resource.allocatedMemory = mem;
3116         surface->flags |= SFLAG_USERPTR;
3117
3118         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3119         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3120
3121         /* For client textures OpenGL has to be notified. */
3122         if (surface->flags & SFLAG_CLIENT)
3123             surface_release_client_storage(surface);
3124
3125         /* Now free the old memory if any. */
3126         HeapFree(GetProcessHeap(), 0, release);
3127     }
3128     else if (surface->flags & SFLAG_USERPTR)
3129     {
3130         /* HeapMemory should be NULL already. */
3131         if (surface->resource.heapMemory)
3132             ERR("User pointer surface has heap memory allocated.\n");
3133
3134         if (!mem)
3135         {
3136             surface->resource.allocatedMemory = NULL;
3137             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3138
3139             if (surface->flags & SFLAG_CLIENT)
3140                 surface_release_client_storage(surface);
3141
3142             surface_prepare_system_memory(surface);
3143         }
3144
3145         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3146     }
3147
3148     return WINED3D_OK;
3149 }
3150
3151 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3152 {
3153     LONG w, h;
3154
3155     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3156
3157     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3158     {
3159         WARN("Not an overlay surface.\n");
3160         return WINEDDERR_NOTAOVERLAYSURFACE;
3161     }
3162
3163     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3164     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3165     surface->overlay_destrect.left = x;
3166     surface->overlay_destrect.top = y;
3167     surface->overlay_destrect.right = x + w;
3168     surface->overlay_destrect.bottom = y + h;
3169
3170     surface->surface_ops->surface_draw_overlay(surface);
3171
3172     return WINED3D_OK;
3173 }
3174
3175 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3176 {
3177     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3178
3179     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3180     {
3181         TRACE("Not an overlay surface.\n");
3182         return WINEDDERR_NOTAOVERLAYSURFACE;
3183     }
3184
3185     if (!surface->overlay_dest)
3186     {
3187         TRACE("Overlay not visible.\n");
3188         *x = 0;
3189         *y = 0;
3190         return WINEDDERR_OVERLAYNOTVISIBLE;
3191     }
3192
3193     *x = surface->overlay_destrect.left;
3194     *y = surface->overlay_destrect.top;
3195
3196     TRACE("Returning position %d, %d.\n", *x, *y);
3197
3198     return WINED3D_OK;
3199 }
3200
3201 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3202         DWORD flags, struct wined3d_surface *ref)
3203 {
3204     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3205
3206     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3207     {
3208         TRACE("Not an overlay surface.\n");
3209         return WINEDDERR_NOTAOVERLAYSURFACE;
3210     }
3211
3212     return WINED3D_OK;
3213 }
3214
3215 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3216         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3217 {
3218     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3219             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3220
3221     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3222     {
3223         WARN("Not an overlay surface.\n");
3224         return WINEDDERR_NOTAOVERLAYSURFACE;
3225     }
3226     else if (!dst_surface)
3227     {
3228         WARN("Dest surface is NULL.\n");
3229         return WINED3DERR_INVALIDCALL;
3230     }
3231
3232     if (src_rect)
3233     {
3234         surface->overlay_srcrect = *src_rect;
3235     }
3236     else
3237     {
3238         surface->overlay_srcrect.left = 0;
3239         surface->overlay_srcrect.top = 0;
3240         surface->overlay_srcrect.right = surface->resource.width;
3241         surface->overlay_srcrect.bottom = surface->resource.height;
3242     }
3243
3244     if (dst_rect)
3245     {
3246         surface->overlay_destrect = *dst_rect;
3247     }
3248     else
3249     {
3250         surface->overlay_destrect.left = 0;
3251         surface->overlay_destrect.top = 0;
3252         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3253         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3254     }
3255
3256     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3257     {
3258         list_remove(&surface->overlay_entry);
3259     }
3260
3261     if (flags & WINEDDOVER_SHOW)
3262     {
3263         if (surface->overlay_dest != dst_surface)
3264         {
3265             surface->overlay_dest = dst_surface;
3266             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3267         }
3268     }
3269     else if (flags & WINEDDOVER_HIDE)
3270     {
3271         /* tests show that the rectangles are erased on hide */
3272         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3273         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3274         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3275         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3276         surface->overlay_dest = NULL;
3277     }
3278
3279     surface->surface_ops->surface_draw_overlay(surface);
3280
3281     return WINED3D_OK;
3282 }
3283
3284 HRESULT CDECL wined3d_surface_set_clipper(struct wined3d_surface *surface, struct wined3d_clipper *clipper)
3285 {
3286     TRACE("surface %p, clipper %p.\n", surface, clipper);
3287
3288     surface->clipper = clipper;
3289
3290     return WINED3D_OK;
3291 }
3292
3293 struct wined3d_clipper * CDECL wined3d_surface_get_clipper(const struct wined3d_surface *surface)
3294 {
3295     TRACE("surface %p.\n", surface);
3296
3297     return surface->clipper;
3298 }
3299
3300 HRESULT CDECL wined3d_surface_set_format(struct wined3d_surface *surface, enum wined3d_format_id format_id)
3301 {
3302     const struct wined3d_format *format = wined3d_get_format(&surface->resource.device->adapter->gl_info, format_id);
3303
3304     TRACE("surface %p, format %s.\n", surface, debug_d3dformat(format_id));
3305
3306     if (surface->resource.format->id != WINED3DFMT_UNKNOWN)
3307     {
3308         FIXME("The format of the surface must be WINED3DFORMAT_UNKNOWN.\n");
3309         return WINED3DERR_INVALIDCALL;
3310     }
3311
3312     surface->resource.size = wined3d_format_calculate_size(format, surface->resource.device->surface_alignment,
3313             surface->pow2Width, surface->pow2Height);
3314     surface->flags |= (WINED3DFMT_D16_LOCKABLE == format_id) ? SFLAG_LOCKABLE : 0;
3315     surface->resource.format = format;
3316
3317     TRACE("size %u, byte_count %u\n", surface->resource.size, format->byte_count);
3318     TRACE("glFormat %#x, glInternal %#x, glType %#x.\n",
3319             format->glFormat, format->glInternal, format->glType);
3320
3321     return WINED3D_OK;
3322 }
3323
3324 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3325         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3326 {
3327     unsigned short *dst_s;
3328     const float *src_f;
3329     unsigned int x, y;
3330
3331     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3332
3333     for (y = 0; y < h; ++y)
3334     {
3335         src_f = (const float *)(src + y * pitch_in);
3336         dst_s = (unsigned short *) (dst + y * pitch_out);
3337         for (x = 0; x < w; ++x)
3338         {
3339             dst_s[x] = float_32_to_16(src_f + x);
3340         }
3341     }
3342 }
3343
3344 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3345         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3346 {
3347     static const unsigned char convert_5to8[] =
3348     {
3349         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3350         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3351         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3352         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3353     };
3354     static const unsigned char convert_6to8[] =
3355     {
3356         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3357         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3358         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3359         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3360         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3361         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3362         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3363         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3364     };
3365     unsigned int x, y;
3366
3367     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3368
3369     for (y = 0; y < h; ++y)
3370     {
3371         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3372         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3373         for (x = 0; x < w; ++x)
3374         {
3375             WORD pixel = src_line[x];
3376             dst_line[x] = 0xff000000
3377                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3378                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3379                     | convert_5to8[(pixel & 0x001f)];
3380         }
3381     }
3382 }
3383
3384 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3385  * in both cases we're just setting the X / Alpha channel to 0xff. */
3386 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3387         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3388 {
3389     unsigned int x, y;
3390
3391     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3392
3393     for (y = 0; y < h; ++y)
3394     {
3395         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3396         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3397
3398         for (x = 0; x < w; ++x)
3399         {
3400             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3401         }
3402     }
3403 }
3404
3405 static inline BYTE cliptobyte(int x)
3406 {
3407     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3408 }
3409
3410 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3411         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3412 {
3413     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3414     unsigned int x, y;
3415
3416     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3417
3418     for (y = 0; y < h; ++y)
3419     {
3420         const BYTE *src_line = src + y * pitch_in;
3421         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3422         for (x = 0; x < w; ++x)
3423         {
3424             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3425              *     C = Y - 16; D = U - 128; E = V - 128;
3426              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3427              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3428              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3429              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3430              * U and V are shared between the pixels. */
3431             if (!(x & 1)) /* For every even pixel, read new U and V. */
3432             {
3433                 d = (int) src_line[1] - 128;
3434                 e = (int) src_line[3] - 128;
3435                 r2 = 409 * e + 128;
3436                 g2 = - 100 * d - 208 * e + 128;
3437                 b2 = 516 * d + 128;
3438             }
3439             c2 = 298 * ((int) src_line[0] - 16);
3440             dst_line[x] = 0xff000000
3441                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3442                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3443                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3444                 /* Scale RGB values to 0..255 range,
3445                  * then clip them if still not in range (may be negative),
3446                  * then shift them within DWORD if necessary. */
3447             src_line += 2;
3448         }
3449     }
3450 }
3451
3452 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3453         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3454 {
3455     unsigned int x, y;
3456     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3457
3458     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3459
3460     for (y = 0; y < h; ++y)
3461     {
3462         const BYTE *src_line = src + y * pitch_in;
3463         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3464         for (x = 0; x < w; ++x)
3465         {
3466             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3467              *     C = Y - 16; D = U - 128; E = V - 128;
3468              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3469              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3470              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3471              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3472              * U and V are shared between the pixels. */
3473             if (!(x & 1)) /* For every even pixel, read new U and V. */
3474             {
3475                 d = (int) src_line[1] - 128;
3476                 e = (int) src_line[3] - 128;
3477                 r2 = 409 * e + 128;
3478                 g2 = - 100 * d - 208 * e + 128;
3479                 b2 = 516 * d + 128;
3480             }
3481             c2 = 298 * ((int) src_line[0] - 16);
3482             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3483                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3484                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3485                 /* Scale RGB values to 0..255 range,
3486                  * then clip them if still not in range (may be negative),
3487                  * then shift them within DWORD if necessary. */
3488             src_line += 2;
3489         }
3490     }
3491 }
3492
3493 struct d3dfmt_convertor_desc
3494 {
3495     enum wined3d_format_id from, to;
3496     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3497 };
3498
3499 static const struct d3dfmt_convertor_desc convertors[] =
3500 {
3501     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3502     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3503     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3504     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3505     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3506     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3507 };
3508
3509 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3510         enum wined3d_format_id to)
3511 {
3512     unsigned int i;
3513
3514     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3515     {
3516         if (convertors[i].from == from && convertors[i].to == to)
3517             return &convertors[i];
3518     }
3519
3520     return NULL;
3521 }
3522
3523 /*****************************************************************************
3524  * surface_convert_format
3525  *
3526  * Creates a duplicate of a surface in a different format. Is used by Blt to
3527  * blit between surfaces with different formats.
3528  *
3529  * Parameters
3530  *  source: Source surface
3531  *  fmt: Requested destination format
3532  *
3533  *****************************************************************************/
3534 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3535 {
3536     const struct d3dfmt_convertor_desc *conv;
3537     WINED3DLOCKED_RECT lock_src, lock_dst;
3538     struct wined3d_surface *ret = NULL;
3539     HRESULT hr;
3540
3541     conv = find_convertor(source->resource.format->id, to_fmt);
3542     if (!conv)
3543     {
3544         FIXME("Cannot find a conversion function from format %s to %s.\n",
3545                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3546         return NULL;
3547     }
3548
3549     wined3d_surface_create(source->resource.device, source->resource.width,
3550             source->resource.height, to_fmt, TRUE /* lockable */, TRUE /* discard  */, 0 /* level */,
3551             0 /* usage */, WINED3DPOOL_SCRATCH, WINED3DMULTISAMPLE_NONE /* TODO: Multisampled conversion */,
3552             0 /* MultiSampleQuality */, source->surface_type, NULL /* parent */, &wined3d_null_parent_ops, &ret);
3553     if (!ret)
3554     {
3555         ERR("Failed to create a destination surface for conversion.\n");
3556         return NULL;
3557     }
3558
3559     memset(&lock_src, 0, sizeof(lock_src));
3560     memset(&lock_dst, 0, sizeof(lock_dst));
3561
3562     hr = wined3d_surface_map(source, &lock_src, NULL, WINED3DLOCK_READONLY);
3563     if (FAILED(hr))
3564     {
3565         ERR("Failed to lock the source surface.\n");
3566         wined3d_surface_decref(ret);
3567         return NULL;
3568     }
3569     hr = wined3d_surface_map(ret, &lock_dst, NULL, WINED3DLOCK_READONLY);
3570     if (FAILED(hr))
3571     {
3572         ERR("Failed to lock the destination surface.\n");
3573         wined3d_surface_unmap(source);
3574         wined3d_surface_decref(ret);
3575         return NULL;
3576     }
3577
3578     conv->convert(lock_src.pBits, lock_dst.pBits, lock_src.Pitch, lock_dst.Pitch,
3579             source->resource.width, source->resource.height);
3580
3581     wined3d_surface_unmap(ret);
3582     wined3d_surface_unmap(source);
3583
3584     return ret;
3585 }
3586
3587 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3588         unsigned int bpp, UINT pitch, DWORD color)
3589 {
3590     BYTE *first;
3591     int x, y;
3592
3593     /* Do first row */
3594
3595 #define COLORFILL_ROW(type) \
3596 do { \
3597     type *d = (type *)buf; \
3598     for (x = 0; x < width; ++x) \
3599         d[x] = (type)color; \
3600 } while(0)
3601
3602     switch (bpp)
3603     {
3604         case 1:
3605             COLORFILL_ROW(BYTE);
3606             break;
3607
3608         case 2:
3609             COLORFILL_ROW(WORD);
3610             break;
3611
3612         case 3:
3613         {
3614             BYTE *d = buf;
3615             for (x = 0; x < width; ++x, d += 3)
3616             {
3617                 d[0] = (color      ) & 0xFF;
3618                 d[1] = (color >>  8) & 0xFF;
3619                 d[2] = (color >> 16) & 0xFF;
3620             }
3621             break;
3622         }
3623         case 4:
3624             COLORFILL_ROW(DWORD);
3625             break;
3626
3627         default:
3628             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3629             return WINED3DERR_NOTAVAILABLE;
3630     }
3631
3632 #undef COLORFILL_ROW
3633
3634     /* Now copy first row. */
3635     first = buf;
3636     for (y = 1; y < height; ++y)
3637     {
3638         buf += pitch;
3639         memcpy(buf, first, width * bpp);
3640     }
3641
3642     return WINED3D_OK;
3643 }
3644
3645 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3646 {
3647     TRACE("surface %p.\n", surface);
3648
3649     if (!(surface->flags & SFLAG_LOCKED))
3650     {
3651         WARN("Trying to unmap unmapped surface.\n");
3652         return WINEDDERR_NOTLOCKED;
3653     }
3654     surface->flags &= ~SFLAG_LOCKED;
3655
3656     surface->surface_ops->surface_unmap(surface);
3657
3658     return WINED3D_OK;
3659 }
3660
3661 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3662         WINED3DLOCKED_RECT *locked_rect, const RECT *rect, DWORD flags)
3663 {
3664     TRACE("surface %p, locked_rect %p, rect %s, flags %#x.\n",
3665             surface, locked_rect, wine_dbgstr_rect(rect), flags);
3666
3667     if (surface->flags & SFLAG_LOCKED)
3668     {
3669         WARN("Surface is already mapped.\n");
3670         return WINED3DERR_INVALIDCALL;
3671     }
3672     surface->flags |= SFLAG_LOCKED;
3673
3674     if (!(surface->flags & SFLAG_LOCKABLE))
3675         WARN("Trying to lock unlockable surface.\n");
3676
3677     surface->surface_ops->surface_map(surface, rect, flags);
3678
3679     locked_rect->Pitch = wined3d_surface_get_pitch(surface);
3680
3681     if (!rect)
3682     {
3683         locked_rect->pBits = surface->resource.allocatedMemory;
3684         surface->lockedRect.left = 0;
3685         surface->lockedRect.top = 0;
3686         surface->lockedRect.right = surface->resource.width;
3687         surface->lockedRect.bottom = surface->resource.height;
3688     }
3689     else
3690     {
3691         const struct wined3d_format *format = surface->resource.format;
3692
3693         if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3694         {
3695             /* Compressed textures are block based, so calculate the offset of
3696              * the block that contains the top-left pixel of the locked rectangle. */
3697             locked_rect->pBits = surface->resource.allocatedMemory
3698                     + ((rect->top / format->block_height) * locked_rect->Pitch)
3699                     + ((rect->left / format->block_width) * format->block_byte_count);
3700         }
3701         else
3702         {
3703             locked_rect->pBits = surface->resource.allocatedMemory
3704                     + (locked_rect->Pitch * rect->top)
3705                     + (rect->left * format->byte_count);
3706         }
3707         surface->lockedRect.left = rect->left;
3708         surface->lockedRect.top = rect->top;
3709         surface->lockedRect.right = rect->right;
3710         surface->lockedRect.bottom = rect->bottom;
3711     }
3712
3713     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3714     TRACE("Returning memory %p, pitch %u.\n", locked_rect->pBits, locked_rect->Pitch);
3715
3716     return WINED3D_OK;
3717 }
3718
3719 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3720 {
3721     HRESULT hr;
3722
3723     TRACE("surface %p, dc %p.\n", surface, dc);
3724
3725     if (surface->flags & SFLAG_USERPTR)
3726     {
3727         ERR("Not supported on surfaces with application-provided memory.\n");
3728         return WINEDDERR_NODC;
3729     }
3730
3731     /* Give more detailed info for ddraw. */
3732     if (surface->flags & SFLAG_DCINUSE)
3733         return WINEDDERR_DCALREADYCREATED;
3734
3735     /* Can't GetDC if the surface is locked. */
3736     if (surface->flags & SFLAG_LOCKED)
3737         return WINED3DERR_INVALIDCALL;
3738
3739     hr = surface->surface_ops->surface_getdc(surface);
3740     if (FAILED(hr))
3741         return hr;
3742
3743     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3744             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3745     {
3746         /* GetDC on palettized formats is unsupported in D3D9, and the method
3747          * is missing in D3D8, so this should only be used for DX <=7
3748          * surfaces (with non-device palettes). */
3749         const PALETTEENTRY *pal = NULL;
3750
3751         if (surface->palette)
3752         {
3753             pal = surface->palette->palents;
3754         }
3755         else
3756         {
3757             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3758             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3759
3760             if (dds_primary && dds_primary->palette)
3761                 pal = dds_primary->palette->palents;
3762         }
3763
3764         if (pal)
3765         {
3766             RGBQUAD col[256];
3767             unsigned int i;
3768
3769             for (i = 0; i < 256; ++i)
3770             {
3771                 col[i].rgbRed = pal[i].peRed;
3772                 col[i].rgbGreen = pal[i].peGreen;
3773                 col[i].rgbBlue = pal[i].peBlue;
3774                 col[i].rgbReserved = 0;
3775             }
3776             SetDIBColorTable(surface->hDC, 0, 256, col);
3777         }
3778     }
3779
3780     surface->flags |= SFLAG_DCINUSE;
3781
3782     *dc = surface->hDC;
3783     TRACE("Returning dc %p.\n", *dc);
3784
3785     return WINED3D_OK;
3786 }
3787
3788 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3789 {
3790     TRACE("surface %p, dc %p.\n", surface, dc);
3791
3792     if (!(surface->flags & SFLAG_DCINUSE))
3793         return WINEDDERR_NODC;
3794
3795     if (surface->hDC != dc)
3796     {
3797         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3798                 dc, surface->hDC);
3799         return WINEDDERR_NODC;
3800     }
3801
3802     /* Copy the contents of the DIB over to the PBO. */
3803     if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
3804         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
3805
3806     /* We locked first, so unlock now. */
3807     wined3d_surface_unmap(surface);
3808
3809     surface->flags &= ~SFLAG_DCINUSE;
3810
3811     return WINED3D_OK;
3812 }
3813
3814 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3815 {
3816     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3817
3818     if (flags)
3819     {
3820         static UINT once;
3821         if (!once++)
3822             FIXME("Ignoring flags %#x.\n", flags);
3823         else
3824             WARN("Ignoring flags %#x.\n", flags);
3825     }
3826
3827     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
3828     {
3829         ERR("Not supported on swapchain surfaces.\n");
3830         return WINEDDERR_NOTFLIPPABLE;
3831     }
3832
3833     /* Flipping is only supported on render targets and overlays. */
3834     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
3835     {
3836         WARN("Tried to flip a non-render target, non-overlay surface.\n");
3837         return WINEDDERR_NOTFLIPPABLE;
3838     }
3839
3840     flip_surface(surface, override);
3841
3842     /* Update overlays if they're visible. */
3843     if ((surface->resource.usage & WINED3DUSAGE_OVERLAY) && surface->overlay_dest)
3844         return surface->surface_ops->surface_draw_overlay(surface);
3845
3846     return WINED3D_OK;
3847 }
3848
3849 /* Do not call while under the GL lock. */
3850 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3851 {
3852     struct wined3d_device *device = surface->resource.device;
3853
3854     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3855
3856     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3857     {
3858         struct wined3d_texture *texture = surface->container.u.texture;
3859
3860         TRACE("Passing to container (%p).\n", texture);
3861         texture->texture_ops->texture_preload(texture, srgb);
3862     }
3863     else
3864     {
3865         struct wined3d_context *context;
3866
3867         TRACE("(%p) : About to load surface\n", surface);
3868
3869         /* TODO: Use already acquired context when possible. */
3870         context = context_acquire(device, NULL);
3871
3872         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3873
3874         if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3875         {
3876             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3877             GLclampf tmp;
3878             tmp = 0.9f;
3879             ENTER_GL();
3880             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3881             LEAVE_GL();
3882         }
3883
3884         context_release(context);
3885     }
3886 }
3887
3888 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3889 {
3890     if (!surface->resource.allocatedMemory)
3891     {
3892         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
3893                 surface->resource.size + RESOURCE_ALIGNMENT);
3894         if (!surface->resource.heapMemory)
3895         {
3896             ERR("Out of memory\n");
3897             return FALSE;
3898         }
3899         surface->resource.allocatedMemory =
3900             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
3901     }
3902     else
3903     {
3904         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
3905     }
3906
3907     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3908
3909     return TRUE;
3910 }
3911
3912 /* Read the framebuffer back into the surface */
3913 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
3914 {
3915     struct wined3d_device *device = surface->resource.device;
3916     const struct wined3d_gl_info *gl_info;
3917     struct wined3d_context *context;
3918     BYTE *mem;
3919     GLint fmt;
3920     GLint type;
3921     BYTE *row, *top, *bottom;
3922     int i;
3923     BOOL bpp;
3924     RECT local_rect;
3925     BOOL srcIsUpsideDown;
3926     GLint rowLen = 0;
3927     GLint skipPix = 0;
3928     GLint skipRow = 0;
3929
3930     if(wined3d_settings.rendertargetlock_mode == RTL_DISABLE) {
3931         static BOOL warned = FALSE;
3932         if(!warned) {
3933             ERR("The application tries to lock the render target, but render target locking is disabled\n");
3934             warned = TRUE;
3935         }
3936         return;
3937     }
3938
3939     context = context_acquire(device, surface);
3940     context_apply_blit_state(context, device);
3941     gl_info = context->gl_info;
3942
3943     ENTER_GL();
3944
3945     /* Select the correct read buffer, and give some debug output.
3946      * There is no need to keep track of the current read buffer or reset it, every part of the code
3947      * that reads sets the read buffer as desired.
3948      */
3949     if (surface_is_offscreen(surface))
3950     {
3951         /* Mapping the primary render target which is not on a swapchain.
3952          * Read from the back buffer. */
3953         TRACE("Mapping offscreen render target.\n");
3954         glReadBuffer(device->offscreenBuffer);
3955         srcIsUpsideDown = TRUE;
3956     }
3957     else
3958     {
3959         /* Onscreen surfaces are always part of a swapchain */
3960         GLenum buffer = surface_get_gl_buffer(surface);
3961         TRACE("Mapping %#x buffer.\n", buffer);
3962         glReadBuffer(buffer);
3963         checkGLcall("glReadBuffer");
3964         srcIsUpsideDown = FALSE;
3965     }
3966
3967     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
3968     if (!rect)
3969     {
3970         local_rect.left = 0;
3971         local_rect.top = 0;
3972         local_rect.right = surface->resource.width;
3973         local_rect.bottom = surface->resource.height;
3974     }
3975     else
3976     {
3977         local_rect = *rect;
3978     }
3979     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
3980
3981     switch (surface->resource.format->id)
3982     {
3983         case WINED3DFMT_P8_UINT:
3984         {
3985             if (primary_render_target_is_p8(device))
3986             {
3987                 /* In case of P8 render targets the index is stored in the alpha component */
3988                 fmt = GL_ALPHA;
3989                 type = GL_UNSIGNED_BYTE;
3990                 mem = dest;
3991                 bpp = surface->resource.format->byte_count;
3992             }
3993             else
3994             {
3995                 /* GL can't return palettized data, so read ARGB pixels into a
3996                  * separate block of memory and convert them into palettized format
3997                  * in software. Slow, but if the app means to use palettized render
3998                  * targets and locks it...
3999                  *
4000                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4001                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4002                  * for the color channels when palettizing the colors.
4003                  */
4004                 fmt = GL_RGB;
4005                 type = GL_UNSIGNED_BYTE;
4006                 pitch *= 3;
4007                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4008                 if (!mem)
4009                 {
4010                     ERR("Out of memory\n");
4011                     LEAVE_GL();
4012                     return;
4013                 }
4014                 bpp = surface->resource.format->byte_count * 3;
4015             }
4016         }
4017         break;
4018
4019         default:
4020             mem = dest;
4021             fmt = surface->resource.format->glFormat;
4022             type = surface->resource.format->glType;
4023             bpp = surface->resource.format->byte_count;
4024     }
4025
4026     if (surface->flags & SFLAG_PBO)
4027     {
4028         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4029         checkGLcall("glBindBufferARB");
4030         if (mem)
4031         {
4032             ERR("mem not null for pbo -- unexpected\n");
4033             mem = NULL;
4034         }
4035     }
4036
4037     /* Save old pixel store pack state */
4038     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4039     checkGLcall("glGetIntegerv");
4040     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4041     checkGLcall("glGetIntegerv");
4042     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4043     checkGLcall("glGetIntegerv");
4044
4045     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4046     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4047     checkGLcall("glPixelStorei");
4048     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4049     checkGLcall("glPixelStorei");
4050     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4051     checkGLcall("glPixelStorei");
4052
4053     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4054             local_rect.right - local_rect.left,
4055             local_rect.bottom - local_rect.top,
4056             fmt, type, mem);
4057     checkGLcall("glReadPixels");
4058
4059     /* Reset previous pixel store pack state */
4060     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4061     checkGLcall("glPixelStorei");
4062     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4063     checkGLcall("glPixelStorei");
4064     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4065     checkGLcall("glPixelStorei");
4066
4067     if (surface->flags & SFLAG_PBO)
4068     {
4069         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4070         checkGLcall("glBindBufferARB");
4071
4072         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4073          * to get a pointer to it and perform the flipping in software. This is a lot
4074          * faster than calling glReadPixels for each line. In case we want more speed
4075          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4076         if (!srcIsUpsideDown)
4077         {
4078             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4079             checkGLcall("glBindBufferARB");
4080
4081             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4082             checkGLcall("glMapBufferARB");
4083         }
4084     }
4085
4086     /* TODO: Merge this with the palettization loop below for P8 targets */
4087     if(!srcIsUpsideDown) {
4088         UINT len, off;
4089         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4090             Flip the lines in software */
4091         len = (local_rect.right - local_rect.left) * bpp;
4092         off = local_rect.left * bpp;
4093
4094         row = HeapAlloc(GetProcessHeap(), 0, len);
4095         if(!row) {
4096             ERR("Out of memory\n");
4097             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4098                 HeapFree(GetProcessHeap(), 0, mem);
4099             LEAVE_GL();
4100             return;
4101         }
4102
4103         top = mem + pitch * local_rect.top;
4104         bottom = mem + pitch * (local_rect.bottom - 1);
4105         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4106             memcpy(row, top + off, len);
4107             memcpy(top + off, bottom + off, len);
4108             memcpy(bottom + off, row, len);
4109             top += pitch;
4110             bottom -= pitch;
4111         }
4112         HeapFree(GetProcessHeap(), 0, row);
4113
4114         /* Unmap the temp PBO buffer */
4115         if (surface->flags & SFLAG_PBO)
4116         {
4117             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4118             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4119         }
4120     }
4121
4122     LEAVE_GL();
4123     context_release(context);
4124
4125     /* For P8 textures we need to perform an inverse palette lookup. This is
4126      * done by searching for a palette index which matches the RGB value.
4127      * Note this isn't guaranteed to work when there are multiple entries for
4128      * the same color but we have no choice. In case of P8 render targets,
4129      * the index is stored in the alpha component so no conversion is needed. */
4130     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4131     {
4132         const PALETTEENTRY *pal = NULL;
4133         DWORD width = pitch / 3;
4134         int x, y, c;
4135
4136         if (surface->palette)
4137         {
4138             pal = surface->palette->palents;
4139         }
4140         else
4141         {
4142             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4143             HeapFree(GetProcessHeap(), 0, mem);
4144             return;
4145         }
4146
4147         for(y = local_rect.top; y < local_rect.bottom; y++) {
4148             for(x = local_rect.left; x < local_rect.right; x++) {
4149                 /*                      start              lines            pixels      */
4150                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4151                 const BYTE *green = blue  + 1;
4152                 const BYTE *red = green + 1;
4153
4154                 for(c = 0; c < 256; c++) {
4155                     if(*red   == pal[c].peRed   &&
4156                        *green == pal[c].peGreen &&
4157                        *blue  == pal[c].peBlue)
4158                     {
4159                         *((BYTE *) dest + y * width + x) = c;
4160                         break;
4161                     }
4162                 }
4163             }
4164         }
4165         HeapFree(GetProcessHeap(), 0, mem);
4166     }
4167 }
4168
4169 /* Read the framebuffer contents into a texture */
4170 static void read_from_framebuffer_texture(struct wined3d_surface *surface, BOOL srgb)
4171 {
4172     struct wined3d_device *device = surface->resource.device;
4173     struct wined3d_context *context;
4174
4175     if (!surface_is_offscreen(surface))
4176     {
4177         /* We would need to flip onscreen surfaces, but there's no efficient
4178          * way to do that here. It makes more sense for the caller to
4179          * explicitly go through sysmem. */
4180         ERR("Not supported for onscreen targets.\n");
4181         return;
4182     }
4183
4184     /* Activate the surface to read from. In some situations it isn't the currently active target(e.g. backbuffer
4185      * locking during offscreen rendering). RESOURCELOAD is ok because glCopyTexSubImage2D isn't affected by any
4186      * states in the stateblock, and no driver was found yet that had bugs in that regard.
4187      */
4188     context = context_acquire(device, surface);
4189     device_invalidate_state(device, STATE_FRAMEBUFFER);
4190
4191     surface_prepare_texture(surface, context, srgb);
4192     surface_bind_and_dirtify(surface, context, srgb);
4193
4194     TRACE("Reading back offscreen render target %p.\n", surface);
4195
4196     ENTER_GL();
4197
4198     glReadBuffer(device->offscreenBuffer);
4199     checkGLcall("glReadBuffer");
4200
4201     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4202             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4203     checkGLcall("glCopyTexSubImage2D");
4204
4205     LEAVE_GL();
4206
4207     context_release(context);
4208 }
4209
4210 /* Context activation is done by the caller. */
4211 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4212         struct wined3d_context *context, BOOL srgb)
4213 {
4214     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4215     CONVERT_TYPES convert;
4216     struct wined3d_format format;
4217
4218     if (surface->flags & alloc_flag) return;
4219
4220     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4221     if (convert != NO_CONVERSION || format.convert) surface->flags |= SFLAG_CONVERTED;
4222     else surface->flags &= ~SFLAG_CONVERTED;
4223
4224     surface_bind_and_dirtify(surface, context, srgb);
4225     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4226     surface->flags |= alloc_flag;
4227 }
4228
4229 /* Context activation is done by the caller. */
4230 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4231 {
4232     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4233     {
4234         struct wined3d_texture *texture = surface->container.u.texture;
4235         UINT sub_count = texture->level_count * texture->layer_count;
4236         UINT i;
4237
4238         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4239
4240         for (i = 0; i < sub_count; ++i)
4241         {
4242             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4243             surface_prepare_texture_internal(s, context, srgb);
4244         }
4245
4246         return;
4247     }
4248
4249     surface_prepare_texture_internal(surface, context, srgb);
4250 }
4251
4252 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4253 {
4254     if (multisample)
4255     {
4256         if (surface->rb_multisample)
4257             return;
4258
4259         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4260         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4261         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4262                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4263         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4264     }
4265     else
4266     {
4267         if (surface->rb_resolved)
4268             return;
4269
4270         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4271         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4272         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4273                 surface->pow2Width, surface->pow2Height);
4274         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4275     }
4276 }
4277
4278 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4279         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4280 {
4281     struct wined3d_device *device = surface->resource.device;
4282     UINT pitch = wined3d_surface_get_pitch(surface);
4283     const struct wined3d_gl_info *gl_info;
4284     struct wined3d_context *context;
4285     RECT local_rect;
4286     UINT w, h;
4287
4288     surface_get_rect(surface, rect, &local_rect);
4289
4290     mem += local_rect.top * pitch + local_rect.left * bpp;
4291     w = local_rect.right - local_rect.left;
4292     h = local_rect.bottom - local_rect.top;
4293
4294     /* Activate the correct context for the render target */
4295     context = context_acquire(device, surface);
4296     context_apply_blit_state(context, device);
4297     gl_info = context->gl_info;
4298
4299     ENTER_GL();
4300
4301     if (!surface_is_offscreen(surface))
4302     {
4303         GLenum buffer = surface_get_gl_buffer(surface);
4304         TRACE("Unlocking %#x buffer.\n", buffer);
4305         context_set_draw_buffer(context, buffer);
4306
4307         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4308         glPixelZoom(1.0f, -1.0f);
4309     }
4310     else
4311     {
4312         /* Primary offscreen render target */
4313         TRACE("Offscreen render target.\n");
4314         context_set_draw_buffer(context, device->offscreenBuffer);
4315
4316         glPixelZoom(1.0f, 1.0f);
4317     }
4318
4319     glRasterPos3i(local_rect.left, local_rect.top, 1);
4320     checkGLcall("glRasterPos3i");
4321
4322     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4323     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4324
4325     if (surface->flags & SFLAG_PBO)
4326     {
4327         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4328         checkGLcall("glBindBufferARB");
4329     }
4330
4331     glDrawPixels(w, h, fmt, type, mem);
4332     checkGLcall("glDrawPixels");
4333
4334     if (surface->flags & SFLAG_PBO)
4335     {
4336         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4337         checkGLcall("glBindBufferARB");
4338     }
4339
4340     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4341     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4342
4343     LEAVE_GL();
4344
4345     if (wined3d_settings.strict_draw_ordering
4346             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4347             && surface->container.u.swapchain->front_buffer == surface))
4348         wglFlush();
4349
4350     context_release(context);
4351 }
4352
4353 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck,
4354         BOOL use_texturing, struct wined3d_format *format, CONVERT_TYPES *convert)
4355 {
4356     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4357     const struct wined3d_device *device = surface->resource.device;
4358     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4359     BOOL blit_supported = FALSE;
4360
4361     /* Copy the default values from the surface. Below we might perform fixups */
4362     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4363     *format = *surface->resource.format;
4364     *convert = NO_CONVERSION;
4365
4366     /* Ok, now look if we have to do any conversion */
4367     switch (surface->resource.format->id)
4368     {
4369         case WINED3DFMT_P8_UINT:
4370             /* Below the call to blit_supported is disabled for Wine 1.2
4371              * because the function isn't operating correctly yet. At the
4372              * moment 8-bit blits are handled in software and if certain GL
4373              * extensions are around, surface conversion is performed at
4374              * upload time. The blit_supported call recognizes it as a
4375              * destination fixup. This type of upload 'fixup' and 8-bit to
4376              * 8-bit blits need to be handled by the blit_shader.
4377              * TODO: get rid of this #if 0. */
4378 #if 0
4379             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4380                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4381                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4382 #endif
4383             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4384
4385             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4386              * texturing. Further also use conversion in case of color keying.
4387              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4388              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4389              * conflicts with this.
4390              */
4391             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4392                     || colorkey_active || !use_texturing)
4393             {
4394                 format->glFormat = GL_RGBA;
4395                 format->glInternal = GL_RGBA;
4396                 format->glType = GL_UNSIGNED_BYTE;
4397                 format->conv_byte_count = 4;
4398                 if (colorkey_active)
4399                     *convert = CONVERT_PALETTED_CK;
4400                 else
4401                     *convert = CONVERT_PALETTED;
4402             }
4403             break;
4404
4405         case WINED3DFMT_B2G3R3_UNORM:
4406             /* **********************
4407                 GL_UNSIGNED_BYTE_3_3_2
4408                 ********************** */
4409             if (colorkey_active) {
4410                 /* This texture format will never be used.. So do not care about color keying
4411                     up until the point in time it will be needed :-) */
4412                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4413             }
4414             break;
4415
4416         case WINED3DFMT_B5G6R5_UNORM:
4417             if (colorkey_active)
4418             {
4419                 *convert = CONVERT_CK_565;
4420                 format->glFormat = GL_RGBA;
4421                 format->glInternal = GL_RGB5_A1;
4422                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4423                 format->conv_byte_count = 2;
4424             }
4425             break;
4426
4427         case WINED3DFMT_B5G5R5X1_UNORM:
4428             if (colorkey_active)
4429             {
4430                 *convert = CONVERT_CK_5551;
4431                 format->glFormat = GL_BGRA;
4432                 format->glInternal = GL_RGB5_A1;
4433                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4434                 format->conv_byte_count = 2;
4435             }
4436             break;
4437
4438         case WINED3DFMT_B8G8R8_UNORM:
4439             if (colorkey_active)
4440             {
4441                 *convert = CONVERT_CK_RGB24;
4442                 format->glFormat = GL_RGBA;
4443                 format->glInternal = GL_RGBA8;
4444                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4445                 format->conv_byte_count = 4;
4446             }
4447             break;
4448
4449         case WINED3DFMT_B8G8R8X8_UNORM:
4450             if (colorkey_active)
4451             {
4452                 *convert = CONVERT_RGB32_888;
4453                 format->glFormat = GL_RGBA;
4454                 format->glInternal = GL_RGBA8;
4455                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4456                 format->conv_byte_count = 4;
4457             }
4458             break;
4459
4460         default:
4461             break;
4462     }
4463
4464     return WINED3D_OK;
4465 }
4466
4467 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4468 {
4469     const struct wined3d_device *device = surface->resource.device;
4470     const struct wined3d_palette *pal = surface->palette;
4471     BOOL index_in_alpha = FALSE;
4472     unsigned int i;
4473
4474     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4475      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4476      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4477      * duplicate entries. Store the color key in the unused alpha component to speed the
4478      * download up and to make conversion unneeded. */
4479     index_in_alpha = primary_render_target_is_p8(device);
4480
4481     if (!pal)
4482     {
4483         ERR("This code should never get entered for DirectDraw!, expect problems\n");
4484         if (index_in_alpha)
4485         {
4486             /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4487              * there's no palette at this time. */
4488             for (i = 0; i < 256; i++) table[i][3] = i;
4489         }
4490     }
4491     else
4492     {
4493         TRACE("Using surface palette %p\n", pal);
4494         /* Get the surface's palette */
4495         for (i = 0; i < 256; ++i)
4496         {
4497             table[i][0] = pal->palents[i].peRed;
4498             table[i][1] = pal->palents[i].peGreen;
4499             table[i][2] = pal->palents[i].peBlue;
4500
4501             /* When index_in_alpha is set the palette index is stored in the
4502              * alpha component. In case of a readback we can then read
4503              * GL_ALPHA. Color keying is handled in BltOverride using a
4504              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4505              * color key itself is passed to glAlphaFunc in other cases the
4506              * alpha component of pixels that should be masked away is set to 0. */
4507             if (index_in_alpha)
4508             {
4509                 table[i][3] = i;
4510             }
4511             else if (colorkey && (i >= surface->SrcBltCKey.dwColorSpaceLowValue)
4512                     && (i <= surface->SrcBltCKey.dwColorSpaceHighValue))
4513             {
4514                 table[i][3] = 0x00;
4515             }
4516             else if (pal->flags & WINEDDPCAPS_ALPHA)
4517             {
4518                 table[i][3] = pal->palents[i].peFlags;
4519             }
4520             else
4521             {
4522                 table[i][3] = 0xFF;
4523             }
4524         }
4525     }
4526 }
4527
4528 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width,
4529         UINT height, UINT outpitch, CONVERT_TYPES convert, struct wined3d_surface *surface)
4530 {
4531     const BYTE *source;
4532     BYTE *dest;
4533     TRACE("(%p)->(%p),(%d,%d,%d,%d,%p)\n", src, dst, pitch, height, outpitch, convert, surface);
4534
4535     switch (convert) {
4536         case NO_CONVERSION:
4537         {
4538             memcpy(dst, src, pitch * height);
4539             break;
4540         }
4541         case CONVERT_PALETTED:
4542         case CONVERT_PALETTED_CK:
4543         {
4544             BYTE table[256][4];
4545             unsigned int x, y;
4546
4547             d3dfmt_p8_init_palette(surface, table, (convert == CONVERT_PALETTED_CK));
4548
4549             for (y = 0; y < height; y++)
4550             {
4551                 source = src + pitch * y;
4552                 dest = dst + outpitch * y;
4553                 /* This is an 1 bpp format, using the width here is fine */
4554                 for (x = 0; x < width; x++) {
4555                     BYTE color = *source++;
4556                     *dest++ = table[color][0];
4557                     *dest++ = table[color][1];
4558                     *dest++ = table[color][2];
4559                     *dest++ = table[color][3];
4560                 }
4561             }
4562         }
4563         break;
4564
4565         case CONVERT_CK_565:
4566         {
4567             /* Converting the 565 format in 5551 packed to emulate color-keying.
4568
4569               Note : in all these conversion, it would be best to average the averaging
4570                       pixels to get the color of the pixel that will be color-keyed to
4571                       prevent 'color bleeding'. This will be done later on if ever it is
4572                       too visible.
4573
4574               Note2: Nvidia documents say that their driver does not support alpha + color keying
4575                      on the same surface and disables color keying in such a case
4576             */
4577             unsigned int x, y;
4578             const WORD *Source;
4579             WORD *Dest;
4580
4581             TRACE("Color keyed 565\n");
4582
4583             for (y = 0; y < height; y++) {
4584                 Source = (const WORD *)(src + y * pitch);
4585                 Dest = (WORD *) (dst + y * outpitch);
4586                 for (x = 0; x < width; x++ ) {
4587                     WORD color = *Source++;
4588                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4589                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4590                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4591                         *Dest |= 0x0001;
4592                     Dest++;
4593                 }
4594             }
4595         }
4596         break;
4597
4598         case CONVERT_CK_5551:
4599         {
4600             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4601             unsigned int x, y;
4602             const WORD *Source;
4603             WORD *Dest;
4604             TRACE("Color keyed 5551\n");
4605             for (y = 0; y < height; y++) {
4606                 Source = (const WORD *)(src + y * pitch);
4607                 Dest = (WORD *) (dst + y * outpitch);
4608                 for (x = 0; x < width; x++ ) {
4609                     WORD color = *Source++;
4610                     *Dest = color;
4611                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4612                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4613                         *Dest |= (1 << 15);
4614                     else
4615                         *Dest &= ~(1 << 15);
4616                     Dest++;
4617                 }
4618             }
4619         }
4620         break;
4621
4622         case CONVERT_CK_RGB24:
4623         {
4624             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4625             unsigned int x, y;
4626             for (y = 0; y < height; y++)
4627             {
4628                 source = src + pitch * y;
4629                 dest = dst + outpitch * y;
4630                 for (x = 0; x < width; x++) {
4631                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4632                     DWORD dstcolor = color << 8;
4633                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4634                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4635                         dstcolor |= 0xff;
4636                     *(DWORD*)dest = dstcolor;
4637                     source += 3;
4638                     dest += 4;
4639                 }
4640             }
4641         }
4642         break;
4643
4644         case CONVERT_RGB32_888:
4645         {
4646             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4647             unsigned int x, y;
4648             for (y = 0; y < height; y++)
4649             {
4650                 source = src + pitch * y;
4651                 dest = dst + outpitch * y;
4652                 for (x = 0; x < width; x++) {
4653                     DWORD color = 0xffffff & *(const DWORD*)source;
4654                     DWORD dstcolor = color << 8;
4655                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4656                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4657                         dstcolor |= 0xff;
4658                     *(DWORD*)dest = dstcolor;
4659                     source += 4;
4660                     dest += 4;
4661                 }
4662             }
4663         }
4664         break;
4665
4666         default:
4667             ERR("Unsupported conversion type %#x.\n", convert);
4668     }
4669     return WINED3D_OK;
4670 }
4671
4672 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4673 {
4674     /* Flip the surface contents */
4675     /* Flip the DC */
4676     {
4677         HDC tmp;
4678         tmp = front->hDC;
4679         front->hDC = back->hDC;
4680         back->hDC = tmp;
4681     }
4682
4683     /* Flip the DIBsection */
4684     {
4685         HBITMAP tmp;
4686         BOOL hasDib = front->flags & SFLAG_DIBSECTION;
4687         tmp = front->dib.DIBsection;
4688         front->dib.DIBsection = back->dib.DIBsection;
4689         back->dib.DIBsection = tmp;
4690
4691         if (back->flags & SFLAG_DIBSECTION) front->flags |= SFLAG_DIBSECTION;
4692         else front->flags &= ~SFLAG_DIBSECTION;
4693         if (hasDib) back->flags |= SFLAG_DIBSECTION;
4694         else back->flags &= ~SFLAG_DIBSECTION;
4695     }
4696
4697     /* Flip the surface data */
4698     {
4699         void* tmp;
4700
4701         tmp = front->dib.bitmap_data;
4702         front->dib.bitmap_data = back->dib.bitmap_data;
4703         back->dib.bitmap_data = tmp;
4704
4705         tmp = front->resource.allocatedMemory;
4706         front->resource.allocatedMemory = back->resource.allocatedMemory;
4707         back->resource.allocatedMemory = tmp;
4708
4709         tmp = front->resource.heapMemory;
4710         front->resource.heapMemory = back->resource.heapMemory;
4711         back->resource.heapMemory = tmp;
4712     }
4713
4714     /* Flip the PBO */
4715     {
4716         GLuint tmp_pbo = front->pbo;
4717         front->pbo = back->pbo;
4718         back->pbo = tmp_pbo;
4719     }
4720
4721     /* client_memory should not be different, but just in case */
4722     {
4723         BOOL tmp;
4724         tmp = front->dib.client_memory;
4725         front->dib.client_memory = back->dib.client_memory;
4726         back->dib.client_memory = tmp;
4727     }
4728
4729     /* Flip the opengl texture */
4730     {
4731         GLuint tmp;
4732
4733         tmp = back->texture_name;
4734         back->texture_name = front->texture_name;
4735         front->texture_name = tmp;
4736
4737         tmp = back->texture_name_srgb;
4738         back->texture_name_srgb = front->texture_name_srgb;
4739         front->texture_name_srgb = tmp;
4740
4741         tmp = back->rb_multisample;
4742         back->rb_multisample = front->rb_multisample;
4743         front->rb_multisample = tmp;
4744
4745         tmp = back->rb_resolved;
4746         back->rb_resolved = front->rb_resolved;
4747         front->rb_resolved = tmp;
4748
4749         resource_unload(&back->resource);
4750         resource_unload(&front->resource);
4751     }
4752
4753     {
4754         DWORD tmp_flags = back->flags;
4755         back->flags = front->flags;
4756         front->flags = tmp_flags;
4757     }
4758 }
4759
4760 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4761  * pixel copy calls. */
4762 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4763         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4764 {
4765     struct wined3d_device *device = dst_surface->resource.device;
4766     float xrel, yrel;
4767     UINT row;
4768     struct wined3d_context *context;
4769     BOOL upsidedown = FALSE;
4770     RECT dst_rect = *dst_rect_in;
4771
4772     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4773      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4774      */
4775     if(dst_rect.top > dst_rect.bottom) {
4776         UINT tmp = dst_rect.bottom;
4777         dst_rect.bottom = dst_rect.top;
4778         dst_rect.top = tmp;
4779         upsidedown = TRUE;
4780     }
4781
4782     context = context_acquire(device, src_surface);
4783     context_apply_blit_state(context, device);
4784     surface_internal_preload(dst_surface, SRGB_RGB);
4785     ENTER_GL();
4786
4787     /* Bind the target texture */
4788     context_bind_texture(context, dst_surface->texture_target, dst_surface->texture_name);
4789     if (surface_is_offscreen(src_surface))
4790     {
4791         TRACE("Reading from an offscreen target\n");
4792         upsidedown = !upsidedown;
4793         glReadBuffer(device->offscreenBuffer);
4794     }
4795     else
4796     {
4797         glReadBuffer(surface_get_gl_buffer(src_surface));
4798     }
4799     checkGLcall("glReadBuffer");
4800
4801     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4802     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4803
4804     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4805     {
4806         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4807
4808         if(Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT) {
4809             ERR("Texture filtering not supported in direct blit\n");
4810         }
4811     }
4812     else if ((Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT)
4813             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4814     {
4815         ERR("Texture filtering not supported in direct blit\n");
4816     }
4817
4818     if (upsidedown
4819             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4820             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4821     {
4822         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4823
4824         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4825                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4826                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4827                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4828     }
4829     else
4830     {
4831         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4832         /* I have to process this row by row to swap the image,
4833          * otherwise it would be upside down, so stretching in y direction
4834          * doesn't cost extra time
4835          *
4836          * However, stretching in x direction can be avoided if not necessary
4837          */
4838         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4839             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4840             {
4841                 /* Well, that stuff works, but it's very slow.
4842                  * find a better way instead
4843                  */
4844                 UINT col;
4845
4846                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4847                 {
4848                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4849                             dst_rect.left + col /* x offset */, row /* y offset */,
4850                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4851                 }
4852             }
4853             else
4854             {
4855                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4856                         dst_rect.left /* x offset */, row /* y offset */,
4857                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4858             }
4859         }
4860     }
4861     checkGLcall("glCopyTexSubImage2D");
4862
4863     LEAVE_GL();
4864     context_release(context);
4865
4866     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4867      * path is never entered
4868      */
4869     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4870 }
4871
4872 /* Uses the hardware to stretch and flip the image */
4873 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4874         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4875 {
4876     struct wined3d_device *device = dst_surface->resource.device;
4877     struct wined3d_swapchain *src_swapchain = NULL;
4878     GLuint src, backup = 0;
4879     float left, right, top, bottom; /* Texture coordinates */
4880     UINT fbwidth = src_surface->resource.width;
4881     UINT fbheight = src_surface->resource.height;
4882     struct wined3d_context *context;
4883     GLenum drawBuffer = GL_BACK;
4884     GLenum texture_target;
4885     BOOL noBackBufferBackup;
4886     BOOL src_offscreen;
4887     BOOL upsidedown = FALSE;
4888     RECT dst_rect = *dst_rect_in;
4889
4890     TRACE("Using hwstretch blit\n");
4891     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4892     context = context_acquire(device, src_surface);
4893     context_apply_blit_state(context, device);
4894     surface_internal_preload(dst_surface, SRGB_RGB);
4895
4896     src_offscreen = surface_is_offscreen(src_surface);
4897     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4898     if (!noBackBufferBackup && !src_surface->texture_name)
4899     {
4900         /* Get it a description */
4901         surface_internal_preload(src_surface, SRGB_RGB);
4902     }
4903     ENTER_GL();
4904
4905     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4906      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4907      */
4908     if (context->aux_buffers >= 2)
4909     {
4910         /* Got more than one aux buffer? Use the 2nd aux buffer */
4911         drawBuffer = GL_AUX1;
4912     }
4913     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4914     {
4915         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
4916         drawBuffer = GL_AUX0;
4917     }
4918
4919     if(noBackBufferBackup) {
4920         glGenTextures(1, &backup);
4921         checkGLcall("glGenTextures");
4922         context_bind_texture(context, GL_TEXTURE_2D, backup);
4923         texture_target = GL_TEXTURE_2D;
4924     } else {
4925         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
4926          * we are reading from the back buffer, the backup can be used as source texture
4927          */
4928         texture_target = src_surface->texture_target;
4929         context_bind_texture(context, texture_target, src_surface->texture_name);
4930         glEnable(texture_target);
4931         checkGLcall("glEnable(texture_target)");
4932
4933         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
4934         src_surface->flags &= ~SFLAG_INTEXTURE;
4935     }
4936
4937     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4938      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4939      */
4940     if(dst_rect.top > dst_rect.bottom) {
4941         UINT tmp = dst_rect.bottom;
4942         dst_rect.bottom = dst_rect.top;
4943         dst_rect.top = tmp;
4944         upsidedown = TRUE;
4945     }
4946
4947     if (src_offscreen)
4948     {
4949         TRACE("Reading from an offscreen target\n");
4950         upsidedown = !upsidedown;
4951         glReadBuffer(device->offscreenBuffer);
4952     }
4953     else
4954     {
4955         glReadBuffer(surface_get_gl_buffer(src_surface));
4956     }
4957
4958     /* TODO: Only back up the part that will be overwritten */
4959     glCopyTexSubImage2D(texture_target, 0,
4960                         0, 0 /* read offsets */,
4961                         0, 0,
4962                         fbwidth,
4963                         fbheight);
4964
4965     checkGLcall("glCopyTexSubImage2D");
4966
4967     /* No issue with overriding these - the sampler is dirty due to blit usage */
4968     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
4969             wined3d_gl_mag_filter(magLookup, Filter));
4970     checkGLcall("glTexParameteri");
4971     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
4972             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
4973     checkGLcall("glTexParameteri");
4974
4975     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
4976         src_swapchain = src_surface->container.u.swapchain;
4977     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
4978     {
4979         src = backup ? backup : src_surface->texture_name;
4980     }
4981     else
4982     {
4983         glReadBuffer(GL_FRONT);
4984         checkGLcall("glReadBuffer(GL_FRONT)");
4985
4986         glGenTextures(1, &src);
4987         checkGLcall("glGenTextures(1, &src)");
4988         context_bind_texture(context, GL_TEXTURE_2D, src);
4989
4990         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
4991          * out for power of 2 sizes
4992          */
4993         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
4994                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
4995         checkGLcall("glTexImage2D");
4996         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
4997                             0, 0 /* read offsets */,
4998                             0, 0,
4999                             fbwidth,
5000                             fbheight);
5001
5002         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5003         checkGLcall("glTexParameteri");
5004         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5005         checkGLcall("glTexParameteri");
5006
5007         glReadBuffer(GL_BACK);
5008         checkGLcall("glReadBuffer(GL_BACK)");
5009
5010         if(texture_target != GL_TEXTURE_2D) {
5011             glDisable(texture_target);
5012             glEnable(GL_TEXTURE_2D);
5013             texture_target = GL_TEXTURE_2D;
5014         }
5015     }
5016     checkGLcall("glEnd and previous");
5017
5018     left = src_rect->left;
5019     right = src_rect->right;
5020
5021     if (!upsidedown)
5022     {
5023         top = src_surface->resource.height - src_rect->top;
5024         bottom = src_surface->resource.height - src_rect->bottom;
5025     }
5026     else
5027     {
5028         top = src_surface->resource.height - src_rect->bottom;
5029         bottom = src_surface->resource.height - src_rect->top;
5030     }
5031
5032     if (src_surface->flags & SFLAG_NORMCOORD)
5033     {
5034         left /= src_surface->pow2Width;
5035         right /= src_surface->pow2Width;
5036         top /= src_surface->pow2Height;
5037         bottom /= src_surface->pow2Height;
5038     }
5039
5040     /* draw the source texture stretched and upside down. The correct surface is bound already */
5041     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5042     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5043
5044     context_set_draw_buffer(context, drawBuffer);
5045     glReadBuffer(drawBuffer);
5046
5047     glBegin(GL_QUADS);
5048         /* bottom left */
5049         glTexCoord2f(left, bottom);
5050         glVertex2i(0, 0);
5051
5052         /* top left */
5053         glTexCoord2f(left, top);
5054         glVertex2i(0, dst_rect.bottom - dst_rect.top);
5055
5056         /* top right */
5057         glTexCoord2f(right, top);
5058         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5059
5060         /* bottom right */
5061         glTexCoord2f(right, bottom);
5062         glVertex2i(dst_rect.right - dst_rect.left, 0);
5063     glEnd();
5064     checkGLcall("glEnd and previous");
5065
5066     if (texture_target != dst_surface->texture_target)
5067     {
5068         glDisable(texture_target);
5069         glEnable(dst_surface->texture_target);
5070         texture_target = dst_surface->texture_target;
5071     }
5072
5073     /* Now read the stretched and upside down image into the destination texture */
5074     context_bind_texture(context, texture_target, dst_surface->texture_name);
5075     glCopyTexSubImage2D(texture_target,
5076                         0,
5077                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5078                         0, 0, /* We blitted the image to the origin */
5079                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5080     checkGLcall("glCopyTexSubImage2D");
5081
5082     if(drawBuffer == GL_BACK) {
5083         /* Write the back buffer backup back */
5084         if(backup) {
5085             if(texture_target != GL_TEXTURE_2D) {
5086                 glDisable(texture_target);
5087                 glEnable(GL_TEXTURE_2D);
5088                 texture_target = GL_TEXTURE_2D;
5089             }
5090             context_bind_texture(context, GL_TEXTURE_2D, backup);
5091         }
5092         else
5093         {
5094             if (texture_target != src_surface->texture_target)
5095             {
5096                 glDisable(texture_target);
5097                 glEnable(src_surface->texture_target);
5098                 texture_target = src_surface->texture_target;
5099             }
5100             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5101         }
5102
5103         glBegin(GL_QUADS);
5104             /* top left */
5105             glTexCoord2f(0.0f, 0.0f);
5106             glVertex2i(0, fbheight);
5107
5108             /* bottom left */
5109             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5110             glVertex2i(0, 0);
5111
5112             /* bottom right */
5113             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5114                     (float)fbheight / (float)src_surface->pow2Height);
5115             glVertex2i(fbwidth, 0);
5116
5117             /* top right */
5118             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5119             glVertex2i(fbwidth, fbheight);
5120         glEnd();
5121     }
5122     glDisable(texture_target);
5123     checkGLcall("glDisable(texture_target)");
5124
5125     /* Cleanup */
5126     if (src != src_surface->texture_name && src != backup)
5127     {
5128         glDeleteTextures(1, &src);
5129         checkGLcall("glDeleteTextures(1, &src)");
5130     }
5131     if(backup) {
5132         glDeleteTextures(1, &backup);
5133         checkGLcall("glDeleteTextures(1, &backup)");
5134     }
5135
5136     LEAVE_GL();
5137
5138     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5139
5140     context_release(context);
5141
5142     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5143      * path is never entered
5144      */
5145     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5146 }
5147
5148 /* Front buffer coordinates are always full screen coordinates, but our GL
5149  * drawable is limited to the window's client area. The sysmem and texture
5150  * copies do have the full screen size. Note that GL has a bottom-left
5151  * origin, while D3D has a top-left origin. */
5152 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5153 {
5154     UINT drawable_height;
5155
5156     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5157             && surface == surface->container.u.swapchain->front_buffer)
5158     {
5159         POINT offset = {0, 0};
5160         RECT windowsize;
5161
5162         ScreenToClient(window, &offset);
5163         OffsetRect(rect, offset.x, offset.y);
5164
5165         GetClientRect(window, &windowsize);
5166         drawable_height = windowsize.bottom - windowsize.top;
5167     }
5168     else
5169     {
5170         drawable_height = surface->resource.height;
5171     }
5172
5173     rect->top = drawable_height - rect->top;
5174     rect->bottom = drawable_height - rect->bottom;
5175 }
5176
5177 static void surface_blt_to_drawable(struct wined3d_device *device,
5178         WINED3DTEXTUREFILTERTYPE filter, BOOL color_key,
5179         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5180         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5181 {
5182     struct wined3d_context *context;
5183     RECT src_rect, dst_rect;
5184
5185     src_rect = *src_rect_in;
5186     dst_rect = *dst_rect_in;
5187
5188     /* Make sure the surface is up-to-date. This should probably use
5189      * surface_load_location() and worry about the destination surface too,
5190      * unless we're overwriting it completely. */
5191     surface_internal_preload(src_surface, SRGB_RGB);
5192
5193     /* Activate the destination context, set it up for blitting */
5194     context = context_acquire(device, dst_surface);
5195     context_apply_blit_state(context, device);
5196
5197     if (!surface_is_offscreen(dst_surface))
5198         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5199
5200     device->blitter->set_shader(device->blit_priv, context, src_surface);
5201
5202     ENTER_GL();
5203
5204     if (color_key)
5205     {
5206         glEnable(GL_ALPHA_TEST);
5207         checkGLcall("glEnable(GL_ALPHA_TEST)");
5208
5209         /* When the primary render target uses P8, the alpha component
5210          * contains the palette index. Which means that the colorkey is one of
5211          * the palette entries. In other cases pixels that should be masked
5212          * away have alpha set to 0. */
5213         if (primary_render_target_is_p8(device))
5214             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->SrcBltCKey.dwColorSpaceLowValue / 256.0f);
5215         else
5216             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5217         checkGLcall("glAlphaFunc");
5218     }
5219     else
5220     {
5221         glDisable(GL_ALPHA_TEST);
5222         checkGLcall("glDisable(GL_ALPHA_TEST)");
5223     }
5224
5225     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5226
5227     if (color_key)
5228     {
5229         glDisable(GL_ALPHA_TEST);
5230         checkGLcall("glDisable(GL_ALPHA_TEST)");
5231     }
5232
5233     LEAVE_GL();
5234
5235     /* Leave the opengl state valid for blitting */
5236     device->blitter->unset_shader(context->gl_info);
5237
5238     if (wined3d_settings.strict_draw_ordering
5239             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5240             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5241         wglFlush(); /* Flush to ensure ordering across contexts. */
5242
5243     context_release(context);
5244 }
5245
5246 /* Do not call while under the GL lock. */
5247 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const WINED3DCOLORVALUE *color)
5248 {
5249     struct wined3d_device *device = s->resource.device;
5250     const struct blit_shader *blitter;
5251
5252     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5253             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5254     if (!blitter)
5255     {
5256         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5257         return WINED3DERR_INVALIDCALL;
5258     }
5259
5260     return blitter->color_fill(device, s, rect, color);
5261 }
5262
5263 /* Do not call while under the GL lock. */
5264 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5265         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5266         WINED3DTEXTUREFILTERTYPE Filter)
5267 {
5268     struct wined3d_device *device = dst_surface->resource.device;
5269     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5270     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5271
5272     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5273             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5274             flags, DDBltFx, debug_d3dtexturefiltertype(Filter));
5275
5276     /* Get the swapchain. One of the surfaces has to be a primary surface */
5277     if (dst_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5278     {
5279         WARN("Destination is in sysmem, rejecting gl blt\n");
5280         return WINED3DERR_INVALIDCALL;
5281     }
5282
5283     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5284         dstSwapchain = dst_surface->container.u.swapchain;
5285
5286     if (src_surface)
5287     {
5288         if (src_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5289         {
5290             WARN("Src is in sysmem, rejecting gl blt\n");
5291             return WINED3DERR_INVALIDCALL;
5292         }
5293
5294         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5295             srcSwapchain = src_surface->container.u.swapchain;
5296     }
5297
5298     /* Early sort out of cases where no render target is used */
5299     if (!dstSwapchain && !srcSwapchain
5300             && src_surface != device->fb.render_targets[0]
5301             && dst_surface != device->fb.render_targets[0])
5302     {
5303         TRACE("No surface is render target, not using hardware blit.\n");
5304         return WINED3DERR_INVALIDCALL;
5305     }
5306
5307     /* No destination color keying supported */
5308     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5309     {
5310         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5311         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5312         return WINED3DERR_INVALIDCALL;
5313     }
5314
5315     if (dstSwapchain && dstSwapchain == srcSwapchain)
5316     {
5317         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5318         return WINED3DERR_INVALIDCALL;
5319     }
5320
5321     if (dstSwapchain && srcSwapchain)
5322     {
5323         FIXME("Implement hardware blit between two different swapchains\n");
5324         return WINED3DERR_INVALIDCALL;
5325     }
5326
5327     if (dstSwapchain)
5328     {
5329         /* Handled with regular texture -> swapchain blit */
5330         if (src_surface == device->fb.render_targets[0])
5331             TRACE("Blit from active render target to a swapchain\n");
5332     }
5333     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5334     {
5335         FIXME("Implement blit from a swapchain to the active render target\n");
5336         return WINED3DERR_INVALIDCALL;
5337     }
5338
5339     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5340     {
5341         /* Blit from render target to texture */
5342         BOOL stretchx;
5343
5344         /* P8 read back is not implemented */
5345         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5346                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5347         {
5348             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5349             return WINED3DERR_INVALIDCALL;
5350         }
5351
5352         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5353         {
5354             TRACE("Color keying not supported by frame buffer to texture blit\n");
5355             return WINED3DERR_INVALIDCALL;
5356             /* Destination color key is checked above */
5357         }
5358
5359         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5360             stretchx = TRUE;
5361         else
5362             stretchx = FALSE;
5363
5364         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5365          * flip the image nor scale it.
5366          *
5367          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5368          * -> If the app wants a image width an unscaled width, copy it line per line
5369          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5370          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5371          *    back buffer. This is slower than reading line per line, thus not used for flipping
5372          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5373          *    pixel by pixel. */
5374         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5375                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5376         {
5377             TRACE("No stretching in x direction, using direct framebuffer -> texture copy\n");
5378             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, Filter);
5379         } else {
5380             TRACE("Using hardware stretching to flip / stretch the texture\n");
5381             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, Filter);
5382         }
5383
5384         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5385         {
5386             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5387             dst_surface->resource.allocatedMemory = NULL;
5388             dst_surface->resource.heapMemory = NULL;
5389         }
5390         else
5391         {
5392             dst_surface->flags &= ~SFLAG_INSYSMEM;
5393         }
5394
5395         return WINED3D_OK;
5396     }
5397     else if (src_surface)
5398     {
5399         /* Blit from offscreen surface to render target */
5400         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5401         WINEDDCOLORKEY oldBltCKey = src_surface->SrcBltCKey;
5402
5403         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5404
5405         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5406                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5407                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5408         {
5409             FIXME("Unsupported blit operation falling back to software\n");
5410             return WINED3DERR_INVALIDCALL;
5411         }
5412
5413         /* Color keying: Check if we have to do a color keyed blt,
5414          * and if not check if a color key is activated.
5415          *
5416          * Just modify the color keying parameters in the surface and restore them afterwards
5417          * The surface keeps track of the color key last used to load the opengl surface.
5418          * PreLoad will catch the change to the flags and color key and reload if necessary.
5419          */
5420         if (flags & WINEDDBLT_KEYSRC)
5421         {
5422             /* Use color key from surface */
5423         }
5424         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5425         {
5426             /* Use color key from DDBltFx */
5427             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5428             src_surface->SrcBltCKey = DDBltFx->ddckSrcColorkey;
5429         }
5430         else
5431         {
5432             /* Do not use color key */
5433             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5434         }
5435
5436         surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5437                 src_surface, src_rect, dst_surface, dst_rect);
5438
5439         /* Restore the color key parameters */
5440         src_surface->CKeyFlags = oldCKeyFlags;
5441         src_surface->SrcBltCKey = oldBltCKey;
5442
5443         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5444
5445         return WINED3D_OK;
5446     }
5447
5448     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5449     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5450     return WINED3DERR_INVALIDCALL;
5451 }
5452
5453 /* GL locking is done by the caller */
5454 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5455         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5456 {
5457     struct wined3d_device *device = surface->resource.device;
5458     const struct wined3d_gl_info *gl_info = context->gl_info;
5459     GLint compare_mode = GL_NONE;
5460     struct blt_info info;
5461     GLint old_binding = 0;
5462     RECT rect;
5463
5464     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5465
5466     glDisable(GL_CULL_FACE);
5467     glDisable(GL_BLEND);
5468     glDisable(GL_ALPHA_TEST);
5469     glDisable(GL_SCISSOR_TEST);
5470     glDisable(GL_STENCIL_TEST);
5471     glEnable(GL_DEPTH_TEST);
5472     glDepthFunc(GL_ALWAYS);
5473     glDepthMask(GL_TRUE);
5474     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5475     glViewport(x, y, w, h);
5476
5477     SetRect(&rect, 0, h, w, 0);
5478     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5479     context_active_texture(context, context->gl_info, 0);
5480     glGetIntegerv(info.binding, &old_binding);
5481     glBindTexture(info.bind_target, texture);
5482     if (gl_info->supported[ARB_SHADOW])
5483     {
5484         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5485         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5486     }
5487
5488     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5489             gl_info, info.tex_type, &surface->ds_current_size);
5490
5491     glBegin(GL_TRIANGLE_STRIP);
5492     glTexCoord3fv(info.coords[0]);
5493     glVertex2f(-1.0f, -1.0f);
5494     glTexCoord3fv(info.coords[1]);
5495     glVertex2f(1.0f, -1.0f);
5496     glTexCoord3fv(info.coords[2]);
5497     glVertex2f(-1.0f, 1.0f);
5498     glTexCoord3fv(info.coords[3]);
5499     glVertex2f(1.0f, 1.0f);
5500     glEnd();
5501
5502     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5503     glBindTexture(info.bind_target, old_binding);
5504
5505     glPopAttrib();
5506
5507     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5508 }
5509
5510 void surface_modify_ds_location(struct wined3d_surface *surface,
5511         DWORD location, UINT w, UINT h)
5512 {
5513     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5514
5515     if (location & ~SFLAG_DS_LOCATIONS)
5516         FIXME("Invalid location (%#x) specified.\n", location);
5517
5518     surface->ds_current_size.cx = w;
5519     surface->ds_current_size.cy = h;
5520     surface->flags &= ~SFLAG_DS_LOCATIONS;
5521     surface->flags |= location;
5522 }
5523
5524 /* Context activation is done by the caller. */
5525 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5526 {
5527     struct wined3d_device *device = surface->resource.device;
5528     GLsizei w, h;
5529
5530     TRACE("surface %p, new location %#x.\n", surface, location);
5531
5532     /* TODO: Make this work for modes other than FBO */
5533     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5534
5535     if (!(surface->flags & location))
5536     {
5537         w = surface->ds_current_size.cx;
5538         h = surface->ds_current_size.cy;
5539         surface->ds_current_size.cx = 0;
5540         surface->ds_current_size.cy = 0;
5541     }
5542     else
5543     {
5544         w = surface->resource.width;
5545         h = surface->resource.height;
5546     }
5547
5548     if (surface->ds_current_size.cx == surface->resource.width
5549             && surface->ds_current_size.cy == surface->resource.height)
5550     {
5551         TRACE("Location (%#x) is already up to date.\n", location);
5552         return;
5553     }
5554
5555     if (surface->current_renderbuffer)
5556     {
5557         FIXME("Not supported with fixed up depth stencil.\n");
5558         return;
5559     }
5560
5561     if (!(surface->flags & SFLAG_DS_LOCATIONS))
5562     {
5563         /* This mostly happens when a depth / stencil is used without being
5564          * cleared first. In principle we could upload from sysmem, or
5565          * explicitly clear before first usage. For the moment there don't
5566          * appear to be a lot of applications depending on this, so a FIXME
5567          * should do. */
5568         FIXME("No up to date depth stencil location.\n");
5569         surface->flags |= location;
5570         surface->ds_current_size.cx = surface->resource.width;
5571         surface->ds_current_size.cy = surface->resource.height;
5572         return;
5573     }
5574
5575     if (location == SFLAG_DS_OFFSCREEN)
5576     {
5577         GLint old_binding = 0;
5578         GLenum bind_target;
5579
5580         /* The render target is allowed to be smaller than the depth/stencil
5581          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5582          * than the offscreen surface. Don't overwrite the offscreen surface
5583          * with undefined data. */
5584         w = min(w, context->swapchain->presentParms.BackBufferWidth);
5585         h = min(h, context->swapchain->presentParms.BackBufferHeight);
5586
5587         TRACE("Copying onscreen depth buffer to depth texture.\n");
5588
5589         ENTER_GL();
5590
5591         if (!device->depth_blt_texture)
5592         {
5593             glGenTextures(1, &device->depth_blt_texture);
5594         }
5595
5596         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5597          * directly on the FBO texture. That's because we need to flip. */
5598         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5599                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5600         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5601         {
5602             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5603             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5604         }
5605         else
5606         {
5607             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5608             bind_target = GL_TEXTURE_2D;
5609         }
5610         glBindTexture(bind_target, device->depth_blt_texture);
5611         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5612          * internal format, because the internal format might include stencil
5613          * data. In principle we should copy stencil data as well, but unless
5614          * the driver supports stencil export it's hard to do, and doesn't
5615          * seem to be needed in practice. If the hardware doesn't support
5616          * writing stencil data, the glCopyTexImage2D() call might trigger
5617          * software fallbacks. */
5618         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5619         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5620         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5621         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5622         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5623         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5624         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5625         glBindTexture(bind_target, old_binding);
5626
5627         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5628                 NULL, surface, SFLAG_INTEXTURE);
5629         context_set_draw_buffer(context, GL_NONE);
5630         glReadBuffer(GL_NONE);
5631
5632         /* Do the actual blit */
5633         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5634         checkGLcall("depth_blt");
5635
5636         context_invalidate_state(context, STATE_FRAMEBUFFER);
5637
5638         LEAVE_GL();
5639
5640         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5641     }
5642     else if (location == SFLAG_DS_ONSCREEN)
5643     {
5644         TRACE("Copying depth texture to onscreen depth buffer.\n");
5645
5646         ENTER_GL();
5647
5648         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5649                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5650         surface_depth_blt(surface, context, surface->texture_name,
5651                 0, surface->pow2Height - h, w, h, surface->texture_target);
5652         checkGLcall("depth_blt");
5653
5654         context_invalidate_state(context, STATE_FRAMEBUFFER);
5655
5656         LEAVE_GL();
5657
5658         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5659     }
5660     else
5661     {
5662         ERR("Invalid location (%#x) specified.\n", location);
5663     }
5664
5665     surface->flags |= location;
5666     surface->ds_current_size.cx = surface->resource.width;
5667     surface->ds_current_size.cy = surface->resource.height;
5668 }
5669
5670 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5671 {
5672     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5673     struct wined3d_surface *overlay;
5674
5675     TRACE("surface %p, location %s, persistent %#x.\n",
5676             surface, debug_surflocation(location), persistent);
5677
5678     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5679             && (location & SFLAG_INDRAWABLE))
5680         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5681
5682     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5683             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5684         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5685
5686     if (persistent)
5687     {
5688         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5689                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5690         {
5691             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5692             {
5693                 TRACE("Passing to container.\n");
5694                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5695             }
5696         }
5697         surface->flags &= ~SFLAG_LOCATIONS;
5698         surface->flags |= location;
5699
5700         /* Redraw emulated overlays, if any */
5701         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5702         {
5703             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5704             {
5705                 overlay->surface_ops->surface_draw_overlay(overlay);
5706             }
5707         }
5708     }
5709     else
5710     {
5711         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5712         {
5713             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5714             {
5715                 TRACE("Passing to container\n");
5716                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5717             }
5718         }
5719         surface->flags &= ~location;
5720     }
5721
5722     if (!(surface->flags & SFLAG_LOCATIONS))
5723     {
5724         ERR("Surface %p does not have any up to date location.\n", surface);
5725     }
5726 }
5727
5728 static DWORD resource_access_from_location(DWORD location)
5729 {
5730     switch (location)
5731     {
5732         case SFLAG_INSYSMEM:
5733             return WINED3D_RESOURCE_ACCESS_CPU;
5734
5735         case SFLAG_INDRAWABLE:
5736         case SFLAG_INSRGBTEX:
5737         case SFLAG_INTEXTURE:
5738         case SFLAG_INRB_MULTISAMPLE:
5739         case SFLAG_INRB_RESOLVED:
5740             return WINED3D_RESOURCE_ACCESS_GPU;
5741
5742         default:
5743             FIXME("Unhandled location %#x.\n", location);
5744             return 0;
5745     }
5746 }
5747
5748 static void surface_load_sysmem(struct wined3d_surface *surface,
5749         const struct wined3d_gl_info *gl_info, const RECT *rect)
5750 {
5751     surface_prepare_system_memory(surface);
5752
5753     /* Download the surface to system memory. */
5754     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5755     {
5756         struct wined3d_device *device = surface->resource.device;
5757         struct wined3d_context *context;
5758
5759         /* TODO: Use already acquired context when possible. */
5760         context = context_acquire(device, NULL);
5761
5762         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5763         surface_download_data(surface, gl_info);
5764
5765         context_release(context);
5766
5767         return;
5768     }
5769
5770     /* Note: It might be faster to download into a texture first. */
5771     read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5772             wined3d_surface_get_pitch(surface));
5773 }
5774
5775 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5776         const struct wined3d_gl_info *gl_info, const RECT *rect)
5777 {
5778     struct wined3d_device *device = surface->resource.device;
5779     struct wined3d_format format;
5780     CONVERT_TYPES convert;
5781     UINT byte_count;
5782     BYTE *mem;
5783
5784     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5785     {
5786         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5787         return WINED3DERR_INVALIDCALL;
5788     }
5789
5790     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5791         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5792
5793     if (surface->flags & SFLAG_INTEXTURE)
5794     {
5795         RECT r;
5796
5797         surface_get_rect(surface, rect, &r);
5798         surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
5799
5800         return WINED3D_OK;
5801     }
5802
5803     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5804     {
5805         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5806          * path through sysmem. */
5807         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5808     }
5809
5810     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5811
5812     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5813      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5814      * called. */
5815     if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
5816     {
5817         struct wined3d_context *context;
5818
5819         TRACE("Removing the pbo attached to surface %p.\n", surface);
5820
5821         /* TODO: Use already acquired context when possible. */
5822         context = context_acquire(device, NULL);
5823
5824         surface_remove_pbo(surface, gl_info);
5825
5826         context_release(context);
5827     }
5828
5829     if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
5830     {
5831         UINT height = surface->resource.height;
5832         UINT width = surface->resource.width;
5833         UINT src_pitch, dst_pitch;
5834
5835         byte_count = format.conv_byte_count;
5836         src_pitch = wined3d_surface_get_pitch(surface);
5837
5838         /* Stick to the alignment for the converted surface too, makes it
5839          * easier to load the surface. */
5840         dst_pitch = width * byte_count;
5841         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5842
5843         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5844         {
5845             ERR("Out of memory (%u).\n", dst_pitch * height);
5846             return E_OUTOFMEMORY;
5847         }
5848
5849         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5850                 src_pitch, width, height, dst_pitch, convert, surface);
5851
5852         surface->flags |= SFLAG_CONVERTED;
5853     }
5854     else
5855     {
5856         surface->flags &= ~SFLAG_CONVERTED;
5857         mem = surface->resource.allocatedMemory;
5858         byte_count = format.byte_count;
5859     }
5860
5861     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
5862
5863     /* Don't delete PBO memory. */
5864     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
5865         HeapFree(GetProcessHeap(), 0, mem);
5866
5867     return WINED3D_OK;
5868 }
5869
5870 static HRESULT surface_load_texture(struct wined3d_surface *surface,
5871         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
5872 {
5873     const DWORD attach_flags = WINED3DFMT_FLAG_FBO_ATTACHABLE | WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB;
5874     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
5875     struct wined3d_device *device = surface->resource.device;
5876     struct wined3d_context *context;
5877     UINT width, src_pitch, dst_pitch;
5878     struct wined3d_bo_address data;
5879     struct wined3d_format format;
5880     POINT dst_point = {0, 0};
5881     CONVERT_TYPES convert;
5882     BYTE *mem;
5883
5884     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
5885             && surface_is_offscreen(surface)
5886             && (surface->flags & SFLAG_INDRAWABLE))
5887     {
5888         read_from_framebuffer_texture(surface, srgb);
5889
5890         return WINED3D_OK;
5891     }
5892
5893     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
5894             && (surface->resource.format->flags & attach_flags) == attach_flags
5895             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5896                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
5897                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
5898     {
5899         if (srgb)
5900             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INTEXTURE,
5901                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
5902         else
5903             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INSRGBTEX,
5904                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
5905
5906         return WINED3D_OK;
5907     }
5908
5909     /* Upload from system memory */
5910
5911     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
5912             TRUE /* We will use textures */, &format, &convert);
5913
5914     if (srgb)
5915     {
5916         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
5917         {
5918             /* Performance warning... */
5919             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
5920             surface_load_location(surface, SFLAG_INSYSMEM, rect);
5921         }
5922     }
5923     else
5924     {
5925         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
5926         {
5927             /* Performance warning... */
5928             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
5929             surface_load_location(surface, SFLAG_INSYSMEM, rect);
5930         }
5931     }
5932
5933     if (!(surface->flags & SFLAG_INSYSMEM))
5934     {
5935         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
5936         /* Lets hope we get it from somewhere... */
5937         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5938     }
5939
5940     /* TODO: Use already acquired context when possible. */
5941     context = context_acquire(device, NULL);
5942
5943     surface_prepare_texture(surface, context, srgb);
5944     surface_bind_and_dirtify(surface, context, srgb);
5945
5946     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
5947     {
5948         surface->flags |= SFLAG_GLCKEY;
5949         surface->glCKey = surface->SrcBltCKey;
5950     }
5951     else surface->flags &= ~SFLAG_GLCKEY;
5952
5953     width = surface->resource.width;
5954     src_pitch = wined3d_surface_get_pitch(surface);
5955
5956     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5957      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
5958      * called. */
5959     if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
5960     {
5961         TRACE("Removing the pbo attached to surface %p.\n", surface);
5962         surface_remove_pbo(surface, gl_info);
5963     }
5964
5965     if (format.convert)
5966     {
5967         /* This code is entered for texture formats which need a fixup. */
5968         UINT height = surface->resource.height;
5969
5970         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
5971         dst_pitch = width * format.conv_byte_count;
5972         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5973
5974         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5975         {
5976             ERR("Out of memory (%u).\n", dst_pitch * height);
5977             context_release(context);
5978             return E_OUTOFMEMORY;
5979         }
5980         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
5981     }
5982     else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
5983     {
5984         /* This code is only entered for color keying fixups */
5985         UINT height = surface->resource.height;
5986
5987         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
5988         dst_pitch = width * format.conv_byte_count;
5989         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5990
5991         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5992         {
5993             ERR("Out of memory (%u).\n", dst_pitch * height);
5994             context_release(context);
5995             return E_OUTOFMEMORY;
5996         }
5997         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
5998                 width, height, dst_pitch, convert, surface);
5999     }
6000     else
6001     {
6002         mem = surface->resource.allocatedMemory;
6003     }
6004
6005     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6006     data.addr = mem;
6007     surface_upload_data(surface, gl_info, &format, &src_rect, width, &dst_point, srgb, &data);
6008
6009     context_release(context);
6010
6011     /* Don't delete PBO memory. */
6012     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6013         HeapFree(GetProcessHeap(), 0, mem);
6014
6015     return WINED3D_OK;
6016 }
6017
6018 static void surface_multisample_resolve(struct wined3d_surface *surface)
6019 {
6020     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6021
6022     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6023         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6024
6025     surface_blt_fbo(surface->resource.device, WINED3DTEXF_POINT,
6026             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6027 }
6028
6029 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6030 {
6031     struct wined3d_device *device = surface->resource.device;
6032     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6033     HRESULT hr;
6034
6035     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6036
6037     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6038     {
6039         if (location == SFLAG_INTEXTURE)
6040         {
6041             struct wined3d_context *context = context_acquire(device, NULL);
6042             surface_load_ds_location(surface, context, SFLAG_DS_OFFSCREEN);
6043             context_release(context);
6044             return WINED3D_OK;
6045         }
6046         else
6047         {
6048             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6049             return WINED3DERR_INVALIDCALL;
6050         }
6051     }
6052
6053     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6054         location = SFLAG_INTEXTURE;
6055
6056     if (surface->flags & location)
6057     {
6058         TRACE("Location already up to date.\n");
6059         return WINED3D_OK;
6060     }
6061
6062     if (WARN_ON(d3d_surface))
6063     {
6064         DWORD required_access = resource_access_from_location(location);
6065         if ((surface->resource.access_flags & required_access) != required_access)
6066             WARN("Operation requires %#x access, but surface only has %#x.\n",
6067                     required_access, surface->resource.access_flags);
6068     }
6069
6070     if (!(surface->flags & SFLAG_LOCATIONS))
6071     {
6072         ERR("Surface %p does not have any up to date location.\n", surface);
6073         surface->flags |= SFLAG_LOST;
6074         return WINED3DERR_DEVICELOST;
6075     }
6076
6077     switch (location)
6078     {
6079         case SFLAG_INSYSMEM:
6080             surface_load_sysmem(surface, gl_info, rect);
6081             break;
6082
6083         case SFLAG_INDRAWABLE:
6084             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6085                 return hr;
6086             break;
6087
6088         case SFLAG_INRB_RESOLVED:
6089             surface_multisample_resolve(surface);
6090             break;
6091
6092         case SFLAG_INTEXTURE:
6093         case SFLAG_INSRGBTEX:
6094             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6095                 return hr;
6096             break;
6097
6098         default:
6099             ERR("Don't know how to handle location %#x.\n", location);
6100             break;
6101     }
6102
6103     if (!rect)
6104     {
6105         surface->flags |= location;
6106
6107         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6108             surface_evict_sysmem(surface);
6109     }
6110
6111     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6112             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6113     {
6114         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6115     }
6116
6117     return WINED3D_OK;
6118 }
6119
6120 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6121 {
6122     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6123
6124     /* Not on a swapchain - must be offscreen */
6125     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6126
6127     /* The front buffer is always onscreen */
6128     if (surface == swapchain->front_buffer) return FALSE;
6129
6130     /* If the swapchain is rendered to an FBO, the backbuffer is
6131      * offscreen, otherwise onscreen */
6132     return swapchain->render_to_fbo;
6133 }
6134
6135 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6136 /* Context activation is done by the caller. */
6137 static void ffp_blit_free(struct wined3d_device *device) { }
6138
6139 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6140 /* Context activation is done by the caller. */
6141 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6142 {
6143     BYTE table[256][4];
6144     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6145
6146     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6147
6148     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6149     ENTER_GL();
6150     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6151     LEAVE_GL();
6152 }
6153
6154 /* Context activation is done by the caller. */
6155 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, struct wined3d_surface *surface)
6156 {
6157     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6158
6159     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6160      * else the surface is converted in software at upload time in LoadLocation.
6161      */
6162     if(fixup == COMPLEX_FIXUP_P8 && context->gl_info->supported[EXT_PALETTED_TEXTURE])
6163         ffp_blit_p8_upload_palette(surface, context->gl_info);
6164
6165     ENTER_GL();
6166     glEnable(surface->texture_target);
6167     checkGLcall("glEnable(surface->texture_target)");
6168     LEAVE_GL();
6169     return WINED3D_OK;
6170 }
6171
6172 /* Context activation is done by the caller. */
6173 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6174 {
6175     ENTER_GL();
6176     glDisable(GL_TEXTURE_2D);
6177     checkGLcall("glDisable(GL_TEXTURE_2D)");
6178     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6179     {
6180         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6181         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6182     }
6183     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6184     {
6185         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6186         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6187     }
6188     LEAVE_GL();
6189 }
6190
6191 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6192         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6193         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6194 {
6195     enum complex_fixup src_fixup;
6196
6197     switch (blit_op)
6198     {
6199         case WINED3D_BLIT_OP_COLOR_BLIT:
6200             if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
6201                 return FALSE;
6202
6203             src_fixup = get_complex_fixup(src_format->color_fixup);
6204             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6205             {
6206                 TRACE("Checking support for fixup:\n");
6207                 dump_color_fixup_desc(src_format->color_fixup);
6208             }
6209
6210             if (!is_identity_fixup(dst_format->color_fixup))
6211             {
6212                 TRACE("Destination fixups are not supported\n");
6213                 return FALSE;
6214             }
6215
6216             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6217             {
6218                 TRACE("P8 fixup supported\n");
6219                 return TRUE;
6220             }
6221
6222             /* We only support identity conversions. */
6223             if (is_identity_fixup(src_format->color_fixup))
6224             {
6225                 TRACE("[OK]\n");
6226                 return TRUE;
6227             }
6228
6229             TRACE("[FAILED]\n");
6230             return FALSE;
6231
6232         case WINED3D_BLIT_OP_COLOR_FILL:
6233             if (dst_pool == WINED3DPOOL_SYSTEMMEM)
6234                 return FALSE;
6235
6236             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6237             {
6238                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6239                     return FALSE;
6240             }
6241             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6242             {
6243                 TRACE("Color fill not supported\n");
6244                 return FALSE;
6245             }
6246
6247             /* FIXME: We should reject color fills on formats with fixups,
6248              * but this would break P8 color fills for example. */
6249
6250             return TRUE;
6251
6252         case WINED3D_BLIT_OP_DEPTH_FILL:
6253             return TRUE;
6254
6255         default:
6256             TRACE("Unsupported blit_op=%d\n", blit_op);
6257             return FALSE;
6258     }
6259 }
6260
6261 /* Do not call while under the GL lock. */
6262 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6263         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
6264 {
6265     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6266     struct wined3d_fb_state fb = {&dst_surface, NULL};
6267
6268     return device_clear_render_targets(device, 1, &fb,
6269             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6270 }
6271
6272 /* Do not call while under the GL lock. */
6273 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6274         struct wined3d_surface *surface, const RECT *rect, float depth)
6275 {
6276     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6277     struct wined3d_fb_state fb = {NULL, surface};
6278
6279     return device_clear_render_targets(device, 0, &fb,
6280             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6281 }
6282
6283 const struct blit_shader ffp_blit =  {
6284     ffp_blit_alloc,
6285     ffp_blit_free,
6286     ffp_blit_set,
6287     ffp_blit_unset,
6288     ffp_blit_supported,
6289     ffp_blit_color_fill,
6290     ffp_blit_depth_fill,
6291 };
6292
6293 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6294 {
6295     return WINED3D_OK;
6296 }
6297
6298 /* Context activation is done by the caller. */
6299 static void cpu_blit_free(struct wined3d_device *device)
6300 {
6301 }
6302
6303 /* Context activation is done by the caller. */
6304 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, struct wined3d_surface *surface)
6305 {
6306     return WINED3D_OK;
6307 }
6308
6309 /* Context activation is done by the caller. */
6310 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6311 {
6312 }
6313
6314 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6315         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6316         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6317 {
6318     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6319     {
6320         return TRUE;
6321     }
6322
6323     return FALSE;
6324 }
6325
6326 static HRESULT surface_cpu_blt_compressed(const BYTE *src_data, BYTE *dst_data,
6327         UINT src_pitch, UINT dst_pitch, UINT update_w, UINT update_h,
6328         const struct wined3d_format *format, DWORD flags, const WINEDDBLTFX *fx)
6329 {
6330     UINT row_block_count;
6331     const BYTE *src_row;
6332     BYTE *dst_row;
6333     UINT x, y;
6334
6335     src_row = src_data;
6336     dst_row = dst_data;
6337
6338     row_block_count = (update_w + format->block_width - 1) / format->block_width;
6339
6340     if (!flags)
6341     {
6342         for (y = 0; y < update_h; y += format->block_height)
6343         {
6344             memcpy(dst_row, src_row, row_block_count * format->block_byte_count);
6345             src_row += src_pitch;
6346             dst_row += dst_pitch;
6347         }
6348
6349         return WINED3D_OK;
6350     }
6351
6352     if (flags == WINEDDBLT_DDFX && fx->dwDDFX == WINEDDBLTFX_MIRRORUPDOWN)
6353     {
6354         src_row += (((update_h / format->block_height) - 1) * src_pitch);
6355
6356         switch (format->id)
6357         {
6358             case WINED3DFMT_DXT1:
6359                 for (y = 0; y < update_h; y += format->block_height)
6360                 {
6361                     struct block
6362                     {
6363                         WORD color[2];
6364                         BYTE control_row[4];
6365                     };
6366
6367                     const struct block *s = (const struct block *)src_row;
6368                     struct block *d = (struct block *)dst_row;
6369
6370                     for (x = 0; x < row_block_count; ++x)
6371                     {
6372                         d[x].color[0] = s[x].color[0];
6373                         d[x].color[1] = s[x].color[1];
6374                         d[x].control_row[0] = s[x].control_row[3];
6375                         d[x].control_row[1] = s[x].control_row[2];
6376                         d[x].control_row[2] = s[x].control_row[1];
6377                         d[x].control_row[3] = s[x].control_row[0];
6378                     }
6379                     src_row -= src_pitch;
6380                     dst_row += dst_pitch;
6381                 }
6382                 return WINED3D_OK;
6383
6384             case WINED3DFMT_DXT3:
6385                 for (y = 0; y < update_h; y += format->block_height)
6386                 {
6387                     struct block
6388                     {
6389                         WORD alpha_row[4];
6390                         WORD color[2];
6391                         BYTE control_row[4];
6392                     };
6393
6394                     const struct block *s = (const struct block *)src_row;
6395                     struct block *d = (struct block *)dst_row;
6396
6397                     for (x = 0; x < row_block_count; ++x)
6398                     {
6399                         d[x].alpha_row[0] = s[x].alpha_row[3];
6400                         d[x].alpha_row[1] = s[x].alpha_row[2];
6401                         d[x].alpha_row[2] = s[x].alpha_row[1];
6402                         d[x].alpha_row[3] = s[x].alpha_row[0];
6403                         d[x].color[0] = s[x].color[0];
6404                         d[x].color[1] = s[x].color[1];
6405                         d[x].control_row[0] = s[x].control_row[3];
6406                         d[x].control_row[1] = s[x].control_row[2];
6407                         d[x].control_row[2] = s[x].control_row[1];
6408                         d[x].control_row[3] = s[x].control_row[0];
6409                     }
6410                     src_row -= src_pitch;
6411                     dst_row += dst_pitch;
6412                 }
6413                 return WINED3D_OK;
6414
6415             default:
6416                 FIXME("Compressed flip not implemented for format %s.\n",
6417                         debug_d3dformat(format->id));
6418                 return E_NOTIMPL;
6419         }
6420     }
6421
6422     FIXME("Unsupported blit on compressed surface (format %s, flags %#x, DDFX %#x).\n",
6423             debug_d3dformat(format->id), flags, flags & WINEDDBLT_DDFX ? fx->dwDDFX : 0);
6424
6425     return E_NOTIMPL;
6426 }
6427
6428 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6429         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6430         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
6431 {
6432     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6433     const struct wined3d_format *src_format, *dst_format;
6434     struct wined3d_surface *orig_src = src_surface;
6435     WINED3DLOCKED_RECT dlock, slock;
6436     HRESULT hr = WINED3D_OK;
6437     const BYTE *sbuf;
6438     RECT xdst,xsrc;
6439     BYTE *dbuf;
6440     int x, y;
6441
6442     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6443             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6444             flags, fx, debug_d3dtexturefiltertype(filter));
6445
6446     xsrc = *src_rect;
6447
6448     if (!src_surface)
6449     {
6450         RECT full_rect;
6451
6452         full_rect.left = 0;
6453         full_rect.top = 0;
6454         full_rect.right = dst_surface->resource.width;
6455         full_rect.bottom = dst_surface->resource.height;
6456         IntersectRect(&xdst, &full_rect, dst_rect);
6457     }
6458     else
6459     {
6460         BOOL clip_horiz, clip_vert;
6461
6462         xdst = *dst_rect;
6463         clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6464         clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6465
6466         if (clip_vert || clip_horiz)
6467         {
6468             /* Now check if this is a special case or not... */
6469             if ((flags & WINEDDBLT_DDFX)
6470                     || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6471                     || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6472             {
6473                 WARN("Out of screen rectangle in special case. Not handled right now.\n");
6474                 return WINED3D_OK;
6475             }
6476
6477             if (clip_horiz)
6478             {
6479                 if (xdst.left < 0)
6480                 {
6481                     xsrc.left -= xdst.left;
6482                     xdst.left = 0;
6483                 }
6484                 if (xdst.right > dst_surface->resource.width)
6485                 {
6486                     xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6487                     xdst.right = (int)dst_surface->resource.width;
6488                 }
6489             }
6490
6491             if (clip_vert)
6492             {
6493                 if (xdst.top < 0)
6494                 {
6495                     xsrc.top -= xdst.top;
6496                     xdst.top = 0;
6497                 }
6498                 if (xdst.bottom > dst_surface->resource.height)
6499                 {
6500                     xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6501                     xdst.bottom = (int)dst_surface->resource.height;
6502                 }
6503             }
6504
6505             /* And check if after clipping something is still to be done... */
6506             if ((xdst.right <= 0) || (xdst.bottom <= 0)
6507                     || (xdst.left >= (int)dst_surface->resource.width)
6508                     || (xdst.top >= (int)dst_surface->resource.height)
6509                     || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6510                     || (xsrc.left >= (int)src_surface->resource.width)
6511                     || (xsrc.top >= (int)src_surface->resource.height))
6512             {
6513                 TRACE("Nothing to be done after clipping.\n");
6514                 return WINED3D_OK;
6515             }
6516         }
6517     }
6518
6519     if (src_surface == dst_surface)
6520     {
6521         wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6522         slock = dlock;
6523         src_format = dst_surface->resource.format;
6524         dst_format = src_format;
6525     }
6526     else
6527     {
6528         dst_format = dst_surface->resource.format;
6529         if (src_surface)
6530         {
6531             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6532             {
6533                 src_surface = surface_convert_format(src_surface, dst_format->id);
6534                 if (!src_surface)
6535                 {
6536                     /* The conv function writes a FIXME */
6537                     WARN("Cannot convert source surface format to dest format.\n");
6538                     goto release;
6539                 }
6540             }
6541             wined3d_surface_map(src_surface, &slock, NULL, WINED3DLOCK_READONLY);
6542             src_format = src_surface->resource.format;
6543         }
6544         else
6545         {
6546             src_format = dst_format;
6547         }
6548         if (dst_rect)
6549             wined3d_surface_map(dst_surface, &dlock, &xdst, 0);
6550         else
6551             wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6552     }
6553
6554     bpp = dst_surface->resource.format->byte_count;
6555     srcheight = xsrc.bottom - xsrc.top;
6556     srcwidth = xsrc.right - xsrc.left;
6557     dstheight = xdst.bottom - xdst.top;
6558     dstwidth = xdst.right - xdst.left;
6559     width = (xdst.right - xdst.left) * bpp;
6560
6561     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_COMPRESSED)
6562     {
6563         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6564
6565         if (src_surface == dst_surface)
6566         {
6567             FIXME("Only plain blits supported on compressed surfaces.\n");
6568             hr = E_NOTIMPL;
6569             goto release;
6570         }
6571
6572         if (srcheight != dstheight || srcwidth != dstwidth)
6573         {
6574             WARN("Stretching not supported on compressed surfaces.\n");
6575             hr = WINED3DERR_INVALIDCALL;
6576             goto release;
6577         }
6578
6579         if (srcwidth & (src_format->block_width - 1) || srcheight & (src_format->block_height - 1))
6580         {
6581             WARN("Rectangle not block-aligned.\n");
6582             hr = WINED3DERR_INVALIDCALL;
6583             goto release;
6584         }
6585
6586         hr = surface_cpu_blt_compressed(slock.pBits, dlock.pBits,
6587                 slock.Pitch, dlock.Pitch, dstwidth, dstheight,
6588                 src_format, flags, fx);
6589         goto release;
6590     }
6591
6592     if (dst_rect && src_surface != dst_surface)
6593         dbuf = dlock.pBits;
6594     else
6595         dbuf = (BYTE*)dlock.pBits+(xdst.top*dlock.Pitch)+(xdst.left*bpp);
6596
6597     /* First, all the 'source-less' blits */
6598     if (flags & WINEDDBLT_COLORFILL)
6599     {
6600         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dlock.Pitch, fx->u5.dwFillColor);
6601         flags &= ~WINEDDBLT_COLORFILL;
6602     }
6603
6604     if (flags & WINEDDBLT_DEPTHFILL)
6605     {
6606         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6607     }
6608     if (flags & WINEDDBLT_ROP)
6609     {
6610         /* Catch some degenerate cases here. */
6611         switch (fx->dwROP)
6612         {
6613             case BLACKNESS:
6614                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,0);
6615                 break;
6616             case 0xAA0029: /* No-op */
6617                 break;
6618             case WHITENESS:
6619                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,~0);
6620                 break;
6621             case SRCCOPY: /* Well, we do that below? */
6622                 break;
6623             default:
6624                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6625                 goto error;
6626         }
6627         flags &= ~WINEDDBLT_ROP;
6628     }
6629     if (flags & WINEDDBLT_DDROPS)
6630     {
6631         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6632     }
6633     /* Now the 'with source' blits. */
6634     if (src_surface)
6635     {
6636         const BYTE *sbase;
6637         int sx, xinc, sy, yinc;
6638
6639         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6640             goto release;
6641
6642         if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
6643                 && (srcwidth != dstwidth || srcheight != dstheight))
6644         {
6645             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6646             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6647         }
6648
6649         sbase = (BYTE*)slock.pBits+(xsrc.top*slock.Pitch)+xsrc.left*bpp;
6650         xinc = (srcwidth << 16) / dstwidth;
6651         yinc = (srcheight << 16) / dstheight;
6652
6653         if (!flags)
6654         {
6655             /* No effects, we can cheat here. */
6656             if (dstwidth == srcwidth)
6657             {
6658                 if (dstheight == srcheight)
6659                 {
6660                     /* No stretching in either direction. This needs to be as
6661                      * fast as possible. */
6662                     sbuf = sbase;
6663
6664                     /* Check for overlapping surfaces. */
6665                     if (src_surface != dst_surface || xdst.top < xsrc.top
6666                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6667                     {
6668                         /* No overlap, or dst above src, so copy from top downwards. */
6669                         for (y = 0; y < dstheight; ++y)
6670                         {
6671                             memcpy(dbuf, sbuf, width);
6672                             sbuf += slock.Pitch;
6673                             dbuf += dlock.Pitch;
6674                         }
6675                     }
6676                     else if (xdst.top > xsrc.top)
6677                     {
6678                         /* Copy from bottom upwards. */
6679                         sbuf += (slock.Pitch*dstheight);
6680                         dbuf += (dlock.Pitch*dstheight);
6681                         for (y = 0; y < dstheight; ++y)
6682                         {
6683                             sbuf -= slock.Pitch;
6684                             dbuf -= dlock.Pitch;
6685                             memcpy(dbuf, sbuf, width);
6686                         }
6687                     }
6688                     else
6689                     {
6690                         /* Src and dst overlapping on the same line, use memmove. */
6691                         for (y = 0; y < dstheight; ++y)
6692                         {
6693                             memmove(dbuf, sbuf, width);
6694                             sbuf += slock.Pitch;
6695                             dbuf += dlock.Pitch;
6696                         }
6697                     }
6698                 }
6699                 else
6700                 {
6701                     /* Stretching in y direction only. */
6702                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6703                     {
6704                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6705                         memcpy(dbuf, sbuf, width);
6706                         dbuf += dlock.Pitch;
6707                     }
6708                 }
6709             }
6710             else
6711             {
6712                 /* Stretching in X direction. */
6713                 int last_sy = -1;
6714                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6715                 {
6716                     sbuf = sbase + (sy >> 16) * slock.Pitch;
6717
6718                     if ((sy >> 16) == (last_sy >> 16))
6719                     {
6720                         /* This source row is the same as last source row -
6721                          * Copy the already stretched row. */
6722                         memcpy(dbuf, dbuf - dlock.Pitch, width);
6723                     }
6724                     else
6725                     {
6726 #define STRETCH_ROW(type) \
6727 do { \
6728     const type *s = (const type *)sbuf; \
6729     type *d = (type *)dbuf; \
6730     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6731         d[x] = s[sx >> 16]; \
6732 } while(0)
6733
6734                         switch(bpp)
6735                         {
6736                             case 1:
6737                                 STRETCH_ROW(BYTE);
6738                                 break;
6739                             case 2:
6740                                 STRETCH_ROW(WORD);
6741                                 break;
6742                             case 4:
6743                                 STRETCH_ROW(DWORD);
6744                                 break;
6745                             case 3:
6746                             {
6747                                 const BYTE *s;
6748                                 BYTE *d = dbuf;
6749                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6750                                 {
6751                                     DWORD pixel;
6752
6753                                     s = sbuf + 3 * (sx >> 16);
6754                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6755                                     d[0] = (pixel      ) & 0xff;
6756                                     d[1] = (pixel >>  8) & 0xff;
6757                                     d[2] = (pixel >> 16) & 0xff;
6758                                     d += 3;
6759                                 }
6760                                 break;
6761                             }
6762                             default:
6763                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6764                                 hr = WINED3DERR_NOTAVAILABLE;
6765                                 goto error;
6766                         }
6767 #undef STRETCH_ROW
6768                     }
6769                     dbuf += dlock.Pitch;
6770                     last_sy = sy;
6771                 }
6772             }
6773         }
6774         else
6775         {
6776             LONG dstyinc = dlock.Pitch, dstxinc = bpp;
6777             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6778             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6779             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6780             {
6781                 /* The color keying flags are checked for correctness in ddraw */
6782                 if (flags & WINEDDBLT_KEYSRC)
6783                 {
6784                     keylow  = src_surface->SrcBltCKey.dwColorSpaceLowValue;
6785                     keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
6786                 }
6787                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6788                 {
6789                     keylow = fx->ddckSrcColorkey.dwColorSpaceLowValue;
6790                     keyhigh = fx->ddckSrcColorkey.dwColorSpaceHighValue;
6791                 }
6792
6793                 if (flags & WINEDDBLT_KEYDEST)
6794                 {
6795                     /* Destination color keys are taken from the source surface! */
6796                     destkeylow = src_surface->DestBltCKey.dwColorSpaceLowValue;
6797                     destkeyhigh = src_surface->DestBltCKey.dwColorSpaceHighValue;
6798                 }
6799                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6800                 {
6801                     destkeylow = fx->ddckDestColorkey.dwColorSpaceLowValue;
6802                     destkeyhigh = fx->ddckDestColorkey.dwColorSpaceHighValue;
6803                 }
6804
6805                 if (bpp == 1)
6806                 {
6807                     keymask = 0xff;
6808                 }
6809                 else
6810                 {
6811                     keymask = src_format->red_mask
6812                             | src_format->green_mask
6813                             | src_format->blue_mask;
6814                 }
6815                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6816             }
6817
6818             if (flags & WINEDDBLT_DDFX)
6819             {
6820                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6821                 LONG tmpxy;
6822                 dTopLeft     = dbuf;
6823                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6824                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dlock.Pitch);
6825                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6826
6827                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6828                 {
6829                     /* I don't think we need to do anything about this flag */
6830                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6831                 }
6832                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6833                 {
6834                     tmp          = dTopRight;
6835                     dTopRight    = dTopLeft;
6836                     dTopLeft     = tmp;
6837                     tmp          = dBottomRight;
6838                     dBottomRight = dBottomLeft;
6839                     dBottomLeft  = tmp;
6840                     dstxinc = dstxinc * -1;
6841                 }
6842                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6843                 {
6844                     tmp          = dTopLeft;
6845                     dTopLeft     = dBottomLeft;
6846                     dBottomLeft  = tmp;
6847                     tmp          = dTopRight;
6848                     dTopRight    = dBottomRight;
6849                     dBottomRight = tmp;
6850                     dstyinc = dstyinc * -1;
6851                 }
6852                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6853                 {
6854                     /* I don't think we need to do anything about this flag */
6855                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6856                 }
6857                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6858                 {
6859                     tmp          = dBottomRight;
6860                     dBottomRight = dTopLeft;
6861                     dTopLeft     = tmp;
6862                     tmp          = dBottomLeft;
6863                     dBottomLeft  = dTopRight;
6864                     dTopRight    = tmp;
6865                     dstxinc = dstxinc * -1;
6866                     dstyinc = dstyinc * -1;
6867                 }
6868                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6869                 {
6870                     tmp          = dTopLeft;
6871                     dTopLeft     = dBottomLeft;
6872                     dBottomLeft  = dBottomRight;
6873                     dBottomRight = dTopRight;
6874                     dTopRight    = tmp;
6875                     tmpxy   = dstxinc;
6876                     dstxinc = dstyinc;
6877                     dstyinc = tmpxy;
6878                     dstxinc = dstxinc * -1;
6879                 }
6880                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6881                 {
6882                     tmp          = dTopLeft;
6883                     dTopLeft     = dTopRight;
6884                     dTopRight    = dBottomRight;
6885                     dBottomRight = dBottomLeft;
6886                     dBottomLeft  = tmp;
6887                     tmpxy   = dstxinc;
6888                     dstxinc = dstyinc;
6889                     dstyinc = tmpxy;
6890                     dstyinc = dstyinc * -1;
6891                 }
6892                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6893                 {
6894                     /* I don't think we need to do anything about this flag */
6895                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6896                 }
6897                 dbuf = dTopLeft;
6898                 flags &= ~(WINEDDBLT_DDFX);
6899             }
6900
6901 #define COPY_COLORKEY_FX(type) \
6902 do { \
6903     const type *s; \
6904     type *d = (type *)dbuf, *dx, tmp; \
6905     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
6906     { \
6907         s = (const type *)(sbase + (sy >> 16) * slock.Pitch); \
6908         dx = d; \
6909         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6910         { \
6911             tmp = s[sx >> 16]; \
6912             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
6913                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
6914             { \
6915                 dx[0] = tmp; \
6916             } \
6917             dx = (type *)(((BYTE *)dx) + dstxinc); \
6918         } \
6919         d = (type *)(((BYTE *)d) + dstyinc); \
6920     } \
6921 } while(0)
6922
6923             switch (bpp)
6924             {
6925                 case 1:
6926                     COPY_COLORKEY_FX(BYTE);
6927                     break;
6928                 case 2:
6929                     COPY_COLORKEY_FX(WORD);
6930                     break;
6931                 case 4:
6932                     COPY_COLORKEY_FX(DWORD);
6933                     break;
6934                 case 3:
6935                 {
6936                     const BYTE *s;
6937                     BYTE *d = dbuf, *dx;
6938                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6939                     {
6940                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6941                         dx = d;
6942                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
6943                         {
6944                             DWORD pixel, dpixel = 0;
6945                             s = sbuf + 3 * (sx>>16);
6946                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6947                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
6948                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
6949                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
6950                             {
6951                                 dx[0] = (pixel      ) & 0xff;
6952                                 dx[1] = (pixel >>  8) & 0xff;
6953                                 dx[2] = (pixel >> 16) & 0xff;
6954                             }
6955                             dx += dstxinc;
6956                         }
6957                         d += dstyinc;
6958                     }
6959                     break;
6960                 }
6961                 default:
6962                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
6963                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
6964                     hr = WINED3DERR_NOTAVAILABLE;
6965                     goto error;
6966 #undef COPY_COLORKEY_FX
6967             }
6968         }
6969     }
6970
6971 error:
6972     if (flags && FIXME_ON(d3d_surface))
6973     {
6974         FIXME("\tUnsupported flags: %#x.\n", flags);
6975     }
6976
6977 release:
6978     wined3d_surface_unmap(dst_surface);
6979     if (src_surface && src_surface != dst_surface)
6980         wined3d_surface_unmap(src_surface);
6981     /* Release the converted surface, if any. */
6982     if (src_surface && src_surface != orig_src)
6983         wined3d_surface_decref(src_surface);
6984
6985     return hr;
6986 }
6987
6988 /* Do not call while under the GL lock. */
6989 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6990         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
6991 {
6992     static const RECT src_rect;
6993     WINEDDBLTFX BltFx;
6994
6995     memset(&BltFx, 0, sizeof(BltFx));
6996     BltFx.dwSize = sizeof(BltFx);
6997     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
6998     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
6999             WINEDDBLT_COLORFILL, &BltFx, WINED3DTEXF_POINT);
7000 }
7001
7002 /* Do not call while under the GL lock. */
7003 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7004         struct wined3d_surface *surface, const RECT *rect, float depth)
7005 {
7006     FIXME("Depth filling not implemented by cpu_blit.\n");
7007     return WINED3DERR_INVALIDCALL;
7008 }
7009
7010 const struct blit_shader cpu_blit =  {
7011     cpu_blit_alloc,
7012     cpu_blit_free,
7013     cpu_blit_set,
7014     cpu_blit_unset,
7015     cpu_blit_supported,
7016     cpu_blit_color_fill,
7017     cpu_blit_depth_fill,
7018 };
7019
7020 static HRESULT surface_init(struct wined3d_surface *surface, WINED3DSURFTYPE surface_type, UINT alignment,
7021         UINT width, UINT height, UINT level, BOOL lockable, BOOL discard, WINED3DMULTISAMPLE_TYPE multisample_type,
7022         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7023         WINED3DPOOL pool, void *parent, const struct wined3d_parent_ops *parent_ops)
7024 {
7025     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7026     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7027     unsigned int resource_size;
7028     HRESULT hr;
7029
7030     if (multisample_quality > 0)
7031     {
7032         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7033         multisample_quality = 0;
7034     }
7035
7036     /* Quick lockable sanity check.
7037      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7038      * this function is too deep to need to care about things like this.
7039      * Levels need to be checked too, since they all affect what can be done. */
7040     switch (pool)
7041     {
7042         case WINED3DPOOL_SCRATCH:
7043             if (!lockable)
7044             {
7045                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7046                         "which are mutually exclusive, setting lockable to TRUE.\n");
7047                 lockable = TRUE;
7048             }
7049             break;
7050
7051         case WINED3DPOOL_SYSTEMMEM:
7052             if (!lockable)
7053                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7054             break;
7055
7056         case WINED3DPOOL_MANAGED:
7057             if (usage & WINED3DUSAGE_DYNAMIC)
7058                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7059             break;
7060
7061         case WINED3DPOOL_DEFAULT:
7062             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7063                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7064             break;
7065
7066         default:
7067             FIXME("Unknown pool %#x.\n", pool);
7068             break;
7069     };
7070
7071     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3DPOOL_DEFAULT)
7072         FIXME("Trying to create a render target that isn't in the default pool.\n");
7073
7074     /* FIXME: Check that the format is supported by the device. */
7075
7076     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7077     if (!resource_size)
7078         return WINED3DERR_INVALIDCALL;
7079
7080     surface->surface_type = surface_type;
7081
7082     switch (surface_type)
7083     {
7084         case SURFACE_OPENGL:
7085             surface->surface_ops = &surface_ops;
7086             break;
7087
7088         case SURFACE_GDI:
7089             surface->surface_ops = &gdi_surface_ops;
7090             break;
7091
7092         default:
7093             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7094             return WINED3DERR_INVALIDCALL;
7095     }
7096
7097     hr = resource_init(&surface->resource, device, WINED3DRTYPE_SURFACE, format,
7098             multisample_type, multisample_quality, usage, pool, width, height, 1,
7099             resource_size, parent, parent_ops, &surface_resource_ops);
7100     if (FAILED(hr))
7101     {
7102         WARN("Failed to initialize resource, returning %#x.\n", hr);
7103         return hr;
7104     }
7105
7106     /* "Standalone" surface. */
7107     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7108
7109     surface->texture_level = level;
7110     list_init(&surface->overlays);
7111
7112     /* Flags */
7113     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7114     if (discard)
7115         surface->flags |= SFLAG_DISCARD;
7116     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7117         surface->flags |= SFLAG_LOCKABLE;
7118     /* I'm not sure if this qualifies as a hack or as an optimization. It
7119      * seems reasonable to assume that lockable render targets will get
7120      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7121      * creation. However, the other reason we want to do this is that several
7122      * ddraw applications access surface memory while the surface isn't
7123      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7124      * future locks prevents these from crashing. */
7125     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7126         surface->flags |= SFLAG_DYNLOCK;
7127
7128     /* Mark the texture as dirty so that it gets loaded first time around. */
7129     surface_add_dirty_rect(surface, NULL);
7130     list_init(&surface->renderbuffers);
7131
7132     TRACE("surface %p, memory %p, size %u\n",
7133             surface, surface->resource.allocatedMemory, surface->resource.size);
7134
7135     /* Call the private setup routine */
7136     hr = surface->surface_ops->surface_private_setup(surface);
7137     if (FAILED(hr))
7138     {
7139         ERR("Private setup failed, returning %#x\n", hr);
7140         surface->surface_ops->surface_cleanup(surface);
7141         return hr;
7142     }
7143
7144     return hr;
7145 }
7146
7147 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7148         enum wined3d_format_id format_id, BOOL lockable, BOOL discard, UINT level, DWORD usage, WINED3DPOOL pool,
7149         WINED3DMULTISAMPLE_TYPE multisample_type, DWORD multisample_quality, WINED3DSURFTYPE surface_type,
7150         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7151 {
7152     struct wined3d_surface *object;
7153     HRESULT hr;
7154
7155     TRACE("device %p, width %u, height %u, format %s, lockable %#x, discard %#x, level %u\n",
7156             device, width, height, debug_d3dformat(format_id), lockable, discard, level);
7157     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7158             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7159     TRACE("surface_type %#x, parent %p, parent_ops %p.\n", surface_type, parent, parent_ops);
7160
7161     if (surface_type == SURFACE_OPENGL && !device->adapter)
7162     {
7163         ERR("OpenGL surfaces are not available without OpenGL.\n");
7164         return WINED3DERR_NOTAVAILABLE;
7165     }
7166
7167     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7168     if (!object)
7169     {
7170         ERR("Failed to allocate surface memory.\n");
7171         return WINED3DERR_OUTOFVIDEOMEMORY;
7172     }
7173
7174     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level, lockable,
7175             discard, multisample_type, multisample_quality, device, usage, format_id, pool, parent, parent_ops);
7176     if (FAILED(hr))
7177     {
7178         WARN("Failed to initialize surface, returning %#x.\n", hr);
7179         HeapFree(GetProcessHeap(), 0, object);
7180         return hr;
7181     }
7182
7183     TRACE("Created surface %p.\n", object);
7184     *surface = object;
7185
7186     return hr;
7187 }