wined3d: Remove a redundant compare in IWineD3DSurfaceImpl_BltOverride().
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2008 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         WINED3DTEXTUREFILTERTYPE filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     TRACE("surface %p.\n", surface);
46
47     if (surface->texture_name || (surface->flags & SFLAG_PBO)
48              || surface->rb_multisample || surface->rb_resolved
49              || !list_empty(&surface->renderbuffers))
50     {
51         struct wined3d_renderbuffer_entry *entry, *entry2;
52         const struct wined3d_gl_info *gl_info;
53         struct wined3d_context *context;
54
55         context = context_acquire(surface->resource.device, NULL);
56         gl_info = context->gl_info;
57
58         ENTER_GL();
59
60         if (surface->texture_name)
61         {
62             TRACE("Deleting texture %u.\n", surface->texture_name);
63             glDeleteTextures(1, &surface->texture_name);
64         }
65
66         if (surface->flags & SFLAG_PBO)
67         {
68             TRACE("Deleting PBO %u.\n", surface->pbo);
69             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
70         }
71
72         if (surface->rb_multisample)
73         {
74             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
75             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
76         }
77
78         if (surface->rb_resolved)
79         {
80             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
81             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
82         }
83
84         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
85         {
86             TRACE("Deleting renderbuffer %u.\n", entry->id);
87             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
88             HeapFree(GetProcessHeap(), 0, entry);
89         }
90
91         LEAVE_GL();
92
93         context_release(context);
94     }
95
96     if (surface->flags & SFLAG_DIBSECTION)
97     {
98         /* Release the DC. */
99         SelectObject(surface->hDC, surface->dib.holdbitmap);
100         DeleteDC(surface->hDC);
101         /* Release the DIB section. */
102         DeleteObject(surface->dib.DIBsection);
103         surface->dib.bitmap_data = NULL;
104         surface->resource.allocatedMemory = NULL;
105     }
106
107     if (surface->flags & SFLAG_USERPTR)
108         wined3d_surface_set_mem(surface, NULL);
109     if (surface->overlay_dest)
110         list_remove(&surface->overlay_entry);
111
112     HeapFree(GetProcessHeap(), 0, surface->palette9);
113
114     resource_cleanup(&surface->resource);
115 }
116
117 void surface_update_draw_binding(struct wined3d_surface *surface)
118 {
119     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
120         surface->draw_binding = SFLAG_INDRAWABLE;
121     else if (surface->resource.multisample_type)
122         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
123     else
124         surface->draw_binding = SFLAG_INTEXTURE;
125 }
126
127 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
128 {
129     TRACE("surface %p, container %p.\n", surface, container);
130
131     if (!container && type != WINED3D_CONTAINER_NONE)
132         ERR("Setting NULL container of type %#x.\n", type);
133
134     if (type == WINED3D_CONTAINER_SWAPCHAIN)
135     {
136         surface->get_drawable_size = get_drawable_size_swapchain;
137     }
138     else
139     {
140         switch (wined3d_settings.offscreen_rendering_mode)
141         {
142             case ORM_FBO:
143                 surface->get_drawable_size = get_drawable_size_fbo;
144                 break;
145
146             case ORM_BACKBUFFER:
147                 surface->get_drawable_size = get_drawable_size_backbuffer;
148                 break;
149
150             default:
151                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
152                 return;
153         }
154     }
155
156     surface->container.type = type;
157     surface->container.u.base = container;
158     surface_update_draw_binding(surface);
159 }
160
161 struct blt_info
162 {
163     GLenum binding;
164     GLenum bind_target;
165     enum tex_types tex_type;
166     GLfloat coords[4][3];
167 };
168
169 struct float_rect
170 {
171     float l;
172     float t;
173     float r;
174     float b;
175 };
176
177 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
178 {
179     f->l = ((r->left * 2.0f) / w) - 1.0f;
180     f->t = ((r->top * 2.0f) / h) - 1.0f;
181     f->r = ((r->right * 2.0f) / w) - 1.0f;
182     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
183 }
184
185 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
186 {
187     GLfloat (*coords)[3] = info->coords;
188     struct float_rect f;
189
190     switch (target)
191     {
192         default:
193             FIXME("Unsupported texture target %#x\n", target);
194             /* Fall back to GL_TEXTURE_2D */
195         case GL_TEXTURE_2D:
196             info->binding = GL_TEXTURE_BINDING_2D;
197             info->bind_target = GL_TEXTURE_2D;
198             info->tex_type = tex_2d;
199             coords[0][0] = (float)rect->left / w;
200             coords[0][1] = (float)rect->top / h;
201             coords[0][2] = 0.0f;
202
203             coords[1][0] = (float)rect->right / w;
204             coords[1][1] = (float)rect->top / h;
205             coords[1][2] = 0.0f;
206
207             coords[2][0] = (float)rect->left / w;
208             coords[2][1] = (float)rect->bottom / h;
209             coords[2][2] = 0.0f;
210
211             coords[3][0] = (float)rect->right / w;
212             coords[3][1] = (float)rect->bottom / h;
213             coords[3][2] = 0.0f;
214             break;
215
216         case GL_TEXTURE_RECTANGLE_ARB:
217             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
218             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
219             info->tex_type = tex_rect;
220             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
221             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
222             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
223             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
224             break;
225
226         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
227             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
228             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
229             info->tex_type = tex_cube;
230             cube_coords_float(rect, w, h, &f);
231
232             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
233             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
234             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
235             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
236             break;
237
238         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
239             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
240             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
241             info->tex_type = tex_cube;
242             cube_coords_float(rect, w, h, &f);
243
244             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
245             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
246             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
247             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
248             break;
249
250         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
251             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
252             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
253             info->tex_type = tex_cube;
254             cube_coords_float(rect, w, h, &f);
255
256             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
257             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
258             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
259             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
260             break;
261
262         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
263             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
264             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
265             info->tex_type = tex_cube;
266             cube_coords_float(rect, w, h, &f);
267
268             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
269             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
270             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
271             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
272             break;
273
274         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
275             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
276             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
277             info->tex_type = tex_cube;
278             cube_coords_float(rect, w, h, &f);
279
280             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
281             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
282             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
283             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
284             break;
285
286         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
287             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
288             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
289             info->tex_type = tex_cube;
290             cube_coords_float(rect, w, h, &f);
291
292             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
293             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
294             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
295             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
296             break;
297     }
298 }
299
300 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
301 {
302     if (rect_in)
303         *rect_out = *rect_in;
304     else
305     {
306         rect_out->left = 0;
307         rect_out->top = 0;
308         rect_out->right = surface->resource.width;
309         rect_out->bottom = surface->resource.height;
310     }
311 }
312
313 /* GL locking and context activation is done by the caller */
314 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
315         const RECT *src_rect, const RECT *dst_rect, WINED3DTEXTUREFILTERTYPE Filter)
316 {
317     struct blt_info info;
318
319     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
320
321     glEnable(info.bind_target);
322     checkGLcall("glEnable(bind_target)");
323
324     context_bind_texture(context, info.bind_target, src_surface->texture_name);
325
326     /* Filtering for StretchRect */
327     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
328             wined3d_gl_mag_filter(magLookup, Filter));
329     checkGLcall("glTexParameteri");
330     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
331             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
332     checkGLcall("glTexParameteri");
333     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
334     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
335     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
336     checkGLcall("glTexEnvi");
337
338     /* Draw a quad */
339     glBegin(GL_TRIANGLE_STRIP);
340     glTexCoord3fv(info.coords[0]);
341     glVertex2i(dst_rect->left, dst_rect->top);
342
343     glTexCoord3fv(info.coords[1]);
344     glVertex2i(dst_rect->right, dst_rect->top);
345
346     glTexCoord3fv(info.coords[2]);
347     glVertex2i(dst_rect->left, dst_rect->bottom);
348
349     glTexCoord3fv(info.coords[3]);
350     glVertex2i(dst_rect->right, dst_rect->bottom);
351     glEnd();
352
353     /* Unbind the texture */
354     context_bind_texture(context, info.bind_target, 0);
355
356     /* We changed the filtering settings on the texture. Inform the
357      * container about this to get the filters reset properly next draw. */
358     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
359     {
360         struct wined3d_texture *texture = src_surface->container.u.texture;
361         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3DTEXF_POINT;
362         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3DTEXF_POINT;
363         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3DTEXF_NONE;
364     }
365 }
366
367 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
368 {
369     const struct wined3d_format *format = surface->resource.format;
370     SYSTEM_INFO sysInfo;
371     BITMAPINFO *b_info;
372     int extraline = 0;
373     DWORD *masks;
374     UINT usage;
375     HDC dc;
376
377     TRACE("surface %p.\n", surface);
378
379     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
380     {
381         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
382         return WINED3DERR_INVALIDCALL;
383     }
384
385     switch (format->byte_count)
386     {
387         case 2:
388         case 4:
389             /* Allocate extra space to store the RGB bit masks. */
390             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
391             break;
392
393         case 3:
394             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
395             break;
396
397         default:
398             /* Allocate extra space for a palette. */
399             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
400                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
401             break;
402     }
403
404     if (!b_info)
405         return E_OUTOFMEMORY;
406
407     /* Some applications access the surface in via DWORDs, and do not take
408      * the necessary care at the end of the surface. So we need at least
409      * 4 extra bytes at the end of the surface. Check against the page size,
410      * if the last page used for the surface has at least 4 spare bytes we're
411      * safe, otherwise add an extra line to the DIB section. */
412     GetSystemInfo(&sysInfo);
413     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
414     {
415         extraline = 1;
416         TRACE("Adding an extra line to the DIB section.\n");
417     }
418
419     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
420     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
421     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
422     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
423     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
424             * wined3d_surface_get_pitch(surface);
425     b_info->bmiHeader.biPlanes = 1;
426     b_info->bmiHeader.biBitCount = format->byte_count * 8;
427
428     b_info->bmiHeader.biXPelsPerMeter = 0;
429     b_info->bmiHeader.biYPelsPerMeter = 0;
430     b_info->bmiHeader.biClrUsed = 0;
431     b_info->bmiHeader.biClrImportant = 0;
432
433     /* Get the bit masks */
434     masks = (DWORD *)b_info->bmiColors;
435     switch (surface->resource.format->id)
436     {
437         case WINED3DFMT_B8G8R8_UNORM:
438             usage = DIB_RGB_COLORS;
439             b_info->bmiHeader.biCompression = BI_RGB;
440             break;
441
442         case WINED3DFMT_B5G5R5X1_UNORM:
443         case WINED3DFMT_B5G5R5A1_UNORM:
444         case WINED3DFMT_B4G4R4A4_UNORM:
445         case WINED3DFMT_B4G4R4X4_UNORM:
446         case WINED3DFMT_B2G3R3_UNORM:
447         case WINED3DFMT_B2G3R3A8_UNORM:
448         case WINED3DFMT_R10G10B10A2_UNORM:
449         case WINED3DFMT_R8G8B8A8_UNORM:
450         case WINED3DFMT_R8G8B8X8_UNORM:
451         case WINED3DFMT_B10G10R10A2_UNORM:
452         case WINED3DFMT_B5G6R5_UNORM:
453         case WINED3DFMT_R16G16B16A16_UNORM:
454             usage = 0;
455             b_info->bmiHeader.biCompression = BI_BITFIELDS;
456             masks[0] = format->red_mask;
457             masks[1] = format->green_mask;
458             masks[2] = format->blue_mask;
459             break;
460
461         default:
462             /* Don't know palette */
463             b_info->bmiHeader.biCompression = BI_RGB;
464             usage = 0;
465             break;
466     }
467
468     if (!(dc = GetDC(0)))
469     {
470         HeapFree(GetProcessHeap(), 0, b_info);
471         return HRESULT_FROM_WIN32(GetLastError());
472     }
473
474     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
475             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
476             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
477     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
478     ReleaseDC(0, dc);
479
480     if (!surface->dib.DIBsection)
481     {
482         ERR("Failed to create DIB section.\n");
483         HeapFree(GetProcessHeap(), 0, b_info);
484         return HRESULT_FROM_WIN32(GetLastError());
485     }
486
487     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
488     /* Copy the existing surface to the dib section. */
489     if (surface->resource.allocatedMemory)
490     {
491         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
492                 surface->resource.height * wined3d_surface_get_pitch(surface));
493     }
494     else
495     {
496         /* This is to make maps read the GL texture although memory is allocated. */
497         surface->flags &= ~SFLAG_INSYSMEM;
498     }
499     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
500
501     HeapFree(GetProcessHeap(), 0, b_info);
502
503     /* Now allocate a DC. */
504     surface->hDC = CreateCompatibleDC(0);
505     surface->dib.holdbitmap = SelectObject(surface->hDC, surface->dib.DIBsection);
506     TRACE("Using wined3d palette %p.\n", surface->palette);
507     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
508
509     surface->flags |= SFLAG_DIBSECTION;
510
511     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
512     surface->resource.heapMemory = NULL;
513
514     return WINED3D_OK;
515 }
516
517 static void surface_prepare_system_memory(struct wined3d_surface *surface)
518 {
519     struct wined3d_device *device = surface->resource.device;
520     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
521
522     TRACE("surface %p.\n", surface);
523
524     /* Performance optimization: Count how often a surface is locked, if it is
525      * locked regularly do not throw away the system memory copy. This avoids
526      * the need to download the surface from OpenGL all the time. The surface
527      * is still downloaded if the OpenGL texture is changed. */
528     if (!(surface->flags & SFLAG_DYNLOCK))
529     {
530         if (++surface->lockCount > MAXLOCKCOUNT)
531         {
532             TRACE("Surface is locked regularly, not freeing the system memory copy any more.\n");
533             surface->flags |= SFLAG_DYNLOCK;
534         }
535     }
536
537     /* Create a PBO for dynamically locked surfaces but don't do it for
538      * converted or NPOT surfaces. Also don't create a PBO for systemmem
539      * surfaces. */
540     if (gl_info->supported[ARB_PIXEL_BUFFER_OBJECT] && (surface->flags & SFLAG_DYNLOCK)
541             && !(surface->flags & (SFLAG_PBO | SFLAG_CONVERTED | SFLAG_NONPOW2))
542             && (surface->resource.pool != WINED3DPOOL_SYSTEMMEM))
543     {
544         struct wined3d_context *context;
545         GLenum error;
546
547         context = context_acquire(device, NULL);
548         ENTER_GL();
549
550         GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
551         error = glGetError();
552         if (!surface->pbo || error != GL_NO_ERROR)
553             ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
554
555         TRACE("Binding PBO %u.\n", surface->pbo);
556
557         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
558         checkGLcall("glBindBufferARB");
559
560         GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
561                 surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
562         checkGLcall("glBufferDataARB");
563
564         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
565         checkGLcall("glBindBufferARB");
566
567         /* We don't need the system memory anymore and we can't even use it for PBOs. */
568         if (!(surface->flags & SFLAG_CLIENT))
569         {
570             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
571             surface->resource.heapMemory = NULL;
572         }
573         surface->resource.allocatedMemory = NULL;
574         surface->flags |= SFLAG_PBO;
575         LEAVE_GL();
576         context_release(context);
577     }
578     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
579     {
580         /* Whatever surface we have, make sure that there is memory allocated
581          * for the downloaded copy, or a PBO to map. */
582         if (!surface->resource.heapMemory)
583             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
584
585         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
586                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
587
588         if (surface->flags & SFLAG_INSYSMEM)
589             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
590     }
591 }
592
593 static void surface_evict_sysmem(struct wined3d_surface *surface)
594 {
595     if (surface->flags & SFLAG_DONOTFREE)
596         return;
597
598     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
599     surface->resource.allocatedMemory = NULL;
600     surface->resource.heapMemory = NULL;
601     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
602 }
603
604 /* Context activation is done by the caller. */
605 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
606         struct wined3d_context *context, BOOL srgb)
607 {
608     struct wined3d_device *device = surface->resource.device;
609     DWORD active_sampler;
610
611     /* We don't need a specific texture unit, but after binding the texture
612      * the current unit is dirty. Read the unit back instead of switching to
613      * 0, this avoids messing around with the state manager's GL states. The
614      * current texture unit should always be a valid one.
615      *
616      * To be more specific, this is tricky because we can implicitly be
617      * called from sampler() in state.c. This means we can't touch anything
618      * other than whatever happens to be the currently active texture, or we
619      * would risk marking already applied sampler states dirty again. */
620     active_sampler = device->rev_tex_unit_map[context->active_texture];
621
622     if (active_sampler != WINED3D_UNMAPPED_STAGE)
623         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
624     surface_bind(surface, context, srgb);
625 }
626
627 static void surface_force_reload(struct wined3d_surface *surface)
628 {
629     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
630 }
631
632 static void surface_release_client_storage(struct wined3d_surface *surface)
633 {
634     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
635
636     ENTER_GL();
637     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
638     if (surface->texture_name)
639     {
640         surface_bind_and_dirtify(surface, context, FALSE);
641         glTexImage2D(surface->texture_target, surface->texture_level,
642                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
643     }
644     if (surface->texture_name_srgb)
645     {
646         surface_bind_and_dirtify(surface, context, TRUE);
647         glTexImage2D(surface->texture_target, surface->texture_level,
648                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
649     }
650     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
651     LEAVE_GL();
652
653     context_release(context);
654
655     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
656     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
657     surface_force_reload(surface);
658 }
659
660 static HRESULT surface_private_setup(struct wined3d_surface *surface)
661 {
662     /* TODO: Check against the maximum texture sizes supported by the video card. */
663     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
664     unsigned int pow2Width, pow2Height;
665
666     TRACE("surface %p.\n", surface);
667
668     surface->texture_name = 0;
669     surface->texture_target = GL_TEXTURE_2D;
670
671     /* Non-power2 support */
672     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
673     {
674         pow2Width = surface->resource.width;
675         pow2Height = surface->resource.height;
676     }
677     else
678     {
679         /* Find the nearest pow2 match */
680         pow2Width = pow2Height = 1;
681         while (pow2Width < surface->resource.width)
682             pow2Width <<= 1;
683         while (pow2Height < surface->resource.height)
684             pow2Height <<= 1;
685     }
686     surface->pow2Width = pow2Width;
687     surface->pow2Height = pow2Height;
688
689     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
690     {
691         /* TODO: Add support for non power two compressed textures. */
692         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
693         {
694             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
695                   surface, surface->resource.width, surface->resource.height);
696             return WINED3DERR_NOTAVAILABLE;
697         }
698     }
699
700     if (pow2Width != surface->resource.width
701             || pow2Height != surface->resource.height)
702     {
703         surface->flags |= SFLAG_NONPOW2;
704     }
705
706     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
707             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
708     {
709         /* One of three options:
710          * 1: Do the same as we do with NPOT and scale the texture, (any
711          *    texture ops would require the texture to be scaled which is
712          *    potentially slow)
713          * 2: Set the texture to the maximum size (bad idea).
714          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
715          * 4: Create the surface, but allow it to be used only for DirectDraw
716          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
717          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
718          *    the render target. */
719         if (surface->resource.pool == WINED3DPOOL_DEFAULT || surface->resource.pool == WINED3DPOOL_MANAGED)
720         {
721             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
722             return WINED3DERR_NOTAVAILABLE;
723         }
724
725         /* We should never use this surface in combination with OpenGL! */
726         TRACE("Creating an oversized surface: %ux%u.\n",
727                 surface->pow2Width, surface->pow2Height);
728     }
729     else
730     {
731         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
732          * and EXT_PALETTED_TEXTURE is used in combination with texture
733          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
734          * EXT_PALETTED_TEXTURE doesn't work in combination with
735          * ARB_TEXTURE_RECTANGLE. */
736         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
737                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
738                 && gl_info->supported[EXT_PALETTED_TEXTURE]
739                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
740         {
741             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
742             surface->pow2Width = surface->resource.width;
743             surface->pow2Height = surface->resource.height;
744             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
745         }
746     }
747
748     switch (wined3d_settings.offscreen_rendering_mode)
749     {
750         case ORM_FBO:
751             surface->get_drawable_size = get_drawable_size_fbo;
752             break;
753
754         case ORM_BACKBUFFER:
755             surface->get_drawable_size = get_drawable_size_backbuffer;
756             break;
757
758         default:
759             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
760             return WINED3DERR_INVALIDCALL;
761     }
762
763     surface->flags |= SFLAG_INSYSMEM;
764
765     return WINED3D_OK;
766 }
767
768 static void surface_realize_palette(struct wined3d_surface *surface)
769 {
770     struct wined3d_palette *palette = surface->palette;
771
772     TRACE("surface %p.\n", surface);
773
774     if (!palette) return;
775
776     if (surface->resource.format->id == WINED3DFMT_P8_UINT
777             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
778     {
779         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
780         {
781             /* Make sure the texture is up to date. This call doesn't do
782              * anything if the texture is already up to date. */
783             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
784
785             /* We want to force a palette refresh, so mark the drawable as not being up to date */
786             if (!surface_is_offscreen(surface))
787                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
788         }
789         else
790         {
791             if (!(surface->flags & SFLAG_INSYSMEM))
792             {
793                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
794                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
795             }
796             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
797         }
798     }
799
800     if (surface->flags & SFLAG_DIBSECTION)
801     {
802         RGBQUAD col[256];
803         unsigned int i;
804
805         TRACE("Updating the DC's palette.\n");
806
807         for (i = 0; i < 256; ++i)
808         {
809             col[i].rgbRed   = palette->palents[i].peRed;
810             col[i].rgbGreen = palette->palents[i].peGreen;
811             col[i].rgbBlue  = palette->palents[i].peBlue;
812             col[i].rgbReserved = 0;
813         }
814         SetDIBColorTable(surface->hDC, 0, 256, col);
815     }
816
817     /* Propagate the changes to the drawable when we have a palette. */
818     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
819         surface_load_location(surface, surface->draw_binding, NULL);
820 }
821
822 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
823 {
824     HRESULT hr;
825
826     /* If there's no destination surface there is nothing to do. */
827     if (!surface->overlay_dest)
828         return WINED3D_OK;
829
830     /* Blt calls ModifyLocation on the dest surface, which in turn calls
831      * DrawOverlay to update the overlay. Prevent an endless recursion. */
832     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
833         return WINED3D_OK;
834
835     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
836     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
837             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3DTEXF_LINEAR);
838     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
839
840     return hr;
841 }
842
843 static void surface_preload(struct wined3d_surface *surface)
844 {
845     TRACE("surface %p.\n", surface);
846
847     surface_internal_preload(surface, SRGB_ANY);
848 }
849
850 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
851 {
852     struct wined3d_device *device = surface->resource.device;
853     const RECT *pass_rect = rect;
854
855     TRACE("surface %p, rect %s, flags %#x.\n",
856             surface, wine_dbgstr_rect(rect), flags);
857
858     if (flags & WINED3DLOCK_DISCARD)
859     {
860         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
861         surface_prepare_system_memory(surface);
862         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
863     }
864     else
865     {
866         /* surface_load_location() does not check if the rectangle specifies
867          * the full surface. Most callers don't need that, so do it here. */
868         if (rect && !rect->top && !rect->left
869                 && rect->right == surface->resource.width
870                 && rect->bottom == surface->resource.height)
871             pass_rect = NULL;
872
873         if (!(wined3d_settings.rendertargetlock_mode == RTL_DISABLE
874                 && ((surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
875                 || surface == device->fb.render_targets[0])))
876             surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
877     }
878
879     if (surface->flags & SFLAG_PBO)
880     {
881         const struct wined3d_gl_info *gl_info;
882         struct wined3d_context *context;
883
884         context = context_acquire(device, NULL);
885         gl_info = context->gl_info;
886
887         ENTER_GL();
888         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
889         checkGLcall("glBindBufferARB");
890
891         /* This shouldn't happen but could occur if some other function
892          * didn't handle the PBO properly. */
893         if (surface->resource.allocatedMemory)
894             ERR("The surface already has PBO memory allocated.\n");
895
896         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
897         checkGLcall("glMapBufferARB");
898
899         /* Make sure the PBO isn't set anymore in order not to break non-PBO
900          * calls. */
901         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
902         checkGLcall("glBindBufferARB");
903
904         LEAVE_GL();
905         context_release(context);
906     }
907
908     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
909     {
910         if (!rect)
911             surface_add_dirty_rect(surface, NULL);
912         else
913         {
914             WINED3DBOX b;
915
916             b.Left = rect->left;
917             b.Top = rect->top;
918             b.Right = rect->right;
919             b.Bottom = rect->bottom;
920             b.Front = 0;
921             b.Back = 1;
922             surface_add_dirty_rect(surface, &b);
923         }
924     }
925 }
926
927 static void surface_unmap(struct wined3d_surface *surface)
928 {
929     struct wined3d_device *device = surface->resource.device;
930     BOOL fullsurface;
931
932     TRACE("surface %p.\n", surface);
933
934     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
935
936     if (surface->flags & SFLAG_PBO)
937     {
938         const struct wined3d_gl_info *gl_info;
939         struct wined3d_context *context;
940
941         TRACE("Freeing PBO memory.\n");
942
943         context = context_acquire(device, NULL);
944         gl_info = context->gl_info;
945
946         ENTER_GL();
947         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
948         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
949         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
950         checkGLcall("glUnmapBufferARB");
951         LEAVE_GL();
952         context_release(context);
953
954         surface->resource.allocatedMemory = NULL;
955     }
956
957     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
958
959     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
960     {
961         TRACE("Not dirtified, nothing to do.\n");
962         goto done;
963     }
964
965     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
966             || (device->fb.render_targets && surface == device->fb.render_targets[0]))
967     {
968         if (wined3d_settings.rendertargetlock_mode == RTL_DISABLE)
969         {
970             static BOOL warned = FALSE;
971             if (!warned)
972             {
973                 ERR("The application tries to write to the render target, but render target locking is disabled.\n");
974                 warned = TRUE;
975             }
976             goto done;
977         }
978
979         if (!surface->dirtyRect.left && !surface->dirtyRect.top
980                 && surface->dirtyRect.right == surface->resource.width
981                 && surface->dirtyRect.bottom == surface->resource.height)
982         {
983             fullsurface = TRUE;
984         }
985         else
986         {
987             /* TODO: Proper partial rectangle tracking. */
988             fullsurface = FALSE;
989             surface->flags |= SFLAG_INSYSMEM;
990         }
991
992         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
993
994         /* Partial rectangle tracking is not commonly implemented, it is only
995          * done for render targets. INSYSMEM was set before to tell
996          * surface_load_location() where to read the rectangle from.
997          * Indrawable is set because all modifications from the partial
998          * sysmem copy are written back to the drawable, thus the surface is
999          * merged again in the drawable. The sysmem copy is not fully up to
1000          * date because only a subrectangle was read in Map(). */
1001         if (!fullsurface)
1002         {
1003             surface_modify_location(surface, SFLAG_INDRAWABLE, TRUE);
1004             surface_evict_sysmem(surface);
1005         }
1006
1007         surface->dirtyRect.left = surface->resource.width;
1008         surface->dirtyRect.top = surface->resource.height;
1009         surface->dirtyRect.right = 0;
1010         surface->dirtyRect.bottom = 0;
1011     }
1012     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
1013     {
1014         FIXME("Depth / stencil buffer locking is not implemented.\n");
1015     }
1016
1017 done:
1018     /* Overlays have to be redrawn manually after changes with the GL implementation */
1019     if (surface->overlay_dest)
1020         surface->surface_ops->surface_draw_overlay(surface);
1021 }
1022
1023 static HRESULT surface_getdc(struct wined3d_surface *surface)
1024 {
1025     WINED3DLOCKED_RECT lock;
1026     HRESULT hr;
1027
1028     TRACE("surface %p.\n", surface);
1029
1030     /* Create a DIB section if there isn't a dc yet. */
1031     if (!surface->hDC)
1032     {
1033         if (surface->flags & SFLAG_CLIENT)
1034         {
1035             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1036             surface_release_client_storage(surface);
1037         }
1038         hr = surface_create_dib_section(surface);
1039         if (FAILED(hr))
1040             return WINED3DERR_INVALIDCALL;
1041
1042         /* Use the DIB section from now on if we are not using a PBO. */
1043         if (!(surface->flags & SFLAG_PBO))
1044             surface->resource.allocatedMemory = surface->dib.bitmap_data;
1045     }
1046
1047     /* Map the surface. */
1048     hr = wined3d_surface_map(surface, &lock, NULL, 0);
1049     if (FAILED(hr))
1050         ERR("Map failed, hr %#x.\n", hr);
1051
1052     /* Sync the DIB with the PBO. This can't be done earlier because Map()
1053      * activates the allocatedMemory. */
1054     if (surface->flags & SFLAG_PBO)
1055         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
1056
1057     return hr;
1058 }
1059
1060 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1061 {
1062     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1063         return FALSE;
1064     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1065         return FALSE;
1066     return TRUE;
1067 }
1068
1069 static void wined3d_surface_depth_blt_fbo(struct wined3d_device *device, struct wined3d_surface *src_surface,
1070         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1071 {
1072     const struct wined3d_gl_info *gl_info;
1073     struct wined3d_context *context;
1074     DWORD src_mask, dst_mask;
1075     GLbitfield gl_mask;
1076
1077     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1078             device, src_surface, wine_dbgstr_rect(src_rect),
1079             dst_surface, wine_dbgstr_rect(dst_rect));
1080
1081     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1082     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1083
1084     if (src_mask != dst_mask)
1085     {
1086         ERR("Incompatible formats %s and %s.\n",
1087                 debug_d3dformat(src_surface->resource.format->id),
1088                 debug_d3dformat(dst_surface->resource.format->id));
1089         return;
1090     }
1091
1092     if (!src_mask)
1093     {
1094         ERR("Not a depth / stencil format: %s.\n",
1095                 debug_d3dformat(src_surface->resource.format->id));
1096         return;
1097     }
1098
1099     gl_mask = 0;
1100     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1101         gl_mask |= GL_DEPTH_BUFFER_BIT;
1102     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1103         gl_mask |= GL_STENCIL_BUFFER_BIT;
1104
1105     /* Make sure the locations are up-to-date. Loading the destination
1106      * surface isn't required if the entire surface is overwritten. */
1107     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1108     if (!surface_is_full_rect(dst_surface, dst_rect))
1109         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1110
1111     context = context_acquire(device, NULL);
1112     if (!context->valid)
1113     {
1114         context_release(context);
1115         WARN("Invalid context, skipping blit.\n");
1116         return;
1117     }
1118
1119     gl_info = context->gl_info;
1120
1121     ENTER_GL();
1122
1123     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1124     glReadBuffer(GL_NONE);
1125     checkGLcall("glReadBuffer()");
1126     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1127
1128     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1129     context_set_draw_buffer(context, GL_NONE);
1130     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1131
1132     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1133     {
1134         glDepthMask(GL_TRUE);
1135         context_invalidate_state(context, STATE_RENDER(WINED3DRS_ZWRITEENABLE));
1136     }
1137     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1138     {
1139         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1140         {
1141             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1142             context_invalidate_state(context, STATE_RENDER(WINED3DRS_TWOSIDEDSTENCILMODE));
1143         }
1144         glStencilMask(~0U);
1145         context_invalidate_state(context, STATE_RENDER(WINED3DRS_STENCILWRITEMASK));
1146     }
1147
1148     glDisable(GL_SCISSOR_TEST);
1149     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1150
1151     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1152             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1153     checkGLcall("glBlitFramebuffer()");
1154
1155     LEAVE_GL();
1156
1157     if (wined3d_settings.strict_draw_ordering)
1158         wglFlush(); /* Flush to ensure ordering across contexts. */
1159
1160     context_release(context);
1161 }
1162
1163 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1164  * Depth / stencil is not supported. */
1165 static void surface_blt_fbo(struct wined3d_device *device, const WINED3DTEXTUREFILTERTYPE filter,
1166         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1167         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1168 {
1169     const struct wined3d_gl_info *gl_info;
1170     struct wined3d_context *context;
1171     RECT src_rect, dst_rect;
1172     GLenum gl_filter;
1173     GLenum buffer;
1174
1175     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1176     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1177             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1178     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1179             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1180
1181     src_rect = *src_rect_in;
1182     dst_rect = *dst_rect_in;
1183
1184     switch (filter)
1185     {
1186         case WINED3DTEXF_LINEAR:
1187             gl_filter = GL_LINEAR;
1188             break;
1189
1190         default:
1191             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1192         case WINED3DTEXF_NONE:
1193         case WINED3DTEXF_POINT:
1194             gl_filter = GL_NEAREST;
1195             break;
1196     }
1197
1198     /* Resolve the source surface first if needed. */
1199     if (src_location == SFLAG_INRB_MULTISAMPLE
1200             && (src_surface->resource.format->id != dst_surface->resource.format->id
1201                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1202                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1203         src_location = SFLAG_INRB_RESOLVED;
1204
1205     /* Make sure the locations are up-to-date. Loading the destination
1206      * surface isn't required if the entire surface is overwritten. (And is
1207      * in fact harmful if we're being called by surface_load_location() with
1208      * the purpose of loading the destination surface.) */
1209     surface_load_location(src_surface, src_location, NULL);
1210     if (!surface_is_full_rect(dst_surface, &dst_rect))
1211         surface_load_location(dst_surface, dst_location, NULL);
1212
1213     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1214     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1215     else context = context_acquire(device, NULL);
1216
1217     if (!context->valid)
1218     {
1219         context_release(context);
1220         WARN("Invalid context, skipping blit.\n");
1221         return;
1222     }
1223
1224     gl_info = context->gl_info;
1225
1226     if (src_location == SFLAG_INDRAWABLE)
1227     {
1228         TRACE("Source surface %p is onscreen.\n", src_surface);
1229         buffer = surface_get_gl_buffer(src_surface);
1230         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1231     }
1232     else
1233     {
1234         TRACE("Source surface %p is offscreen.\n", src_surface);
1235         buffer = GL_COLOR_ATTACHMENT0;
1236     }
1237
1238     ENTER_GL();
1239     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1240     glReadBuffer(buffer);
1241     checkGLcall("glReadBuffer()");
1242     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1243     LEAVE_GL();
1244
1245     if (dst_location == SFLAG_INDRAWABLE)
1246     {
1247         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1248         buffer = surface_get_gl_buffer(dst_surface);
1249         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1250     }
1251     else
1252     {
1253         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1254         buffer = GL_COLOR_ATTACHMENT0;
1255     }
1256
1257     ENTER_GL();
1258     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1259     context_set_draw_buffer(context, buffer);
1260     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1261     context_invalidate_state(context, STATE_FRAMEBUFFER);
1262
1263     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1264     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE));
1265     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE1));
1266     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE2));
1267     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE3));
1268
1269     glDisable(GL_SCISSOR_TEST);
1270     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1271
1272     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1273             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1274     checkGLcall("glBlitFramebuffer()");
1275
1276     LEAVE_GL();
1277
1278     if (wined3d_settings.strict_draw_ordering
1279             || (dst_location == SFLAG_INDRAWABLE
1280             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1281         wglFlush();
1282
1283     context_release(context);
1284 }
1285
1286 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1287         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
1288         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
1289 {
1290     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1291         return FALSE;
1292
1293     /* Source and/or destination need to be on the GL side */
1294     if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
1295         return FALSE;
1296
1297     switch (blit_op)
1298     {
1299         case WINED3D_BLIT_OP_COLOR_BLIT:
1300             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1301                 return FALSE;
1302             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1303                 return FALSE;
1304             break;
1305
1306         case WINED3D_BLIT_OP_DEPTH_BLIT:
1307             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1308                 return FALSE;
1309             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1310                 return FALSE;
1311             break;
1312
1313         default:
1314             return FALSE;
1315     }
1316
1317     if (!(src_format->id == dst_format->id
1318             || (is_identity_fixup(src_format->color_fixup)
1319             && is_identity_fixup(dst_format->color_fixup))))
1320         return FALSE;
1321
1322     return TRUE;
1323 }
1324
1325 /* This function checks if the primary render target uses the 8bit paletted format. */
1326 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1327 {
1328     if (device->fb.render_targets && device->fb.render_targets[0])
1329     {
1330         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1331         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1332                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1333             return TRUE;
1334     }
1335     return FALSE;
1336 }
1337
1338 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1339         DWORD color, WINED3DCOLORVALUE *float_color)
1340 {
1341     const struct wined3d_format *format = surface->resource.format;
1342     const struct wined3d_device *device = surface->resource.device;
1343
1344     switch (format->id)
1345     {
1346         case WINED3DFMT_P8_UINT:
1347             if (surface->palette)
1348             {
1349                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1350                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1351                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1352             }
1353             else
1354             {
1355                 float_color->r = 0.0f;
1356                 float_color->g = 0.0f;
1357                 float_color->b = 0.0f;
1358             }
1359             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1360             break;
1361
1362         case WINED3DFMT_B5G6R5_UNORM:
1363             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1364             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1365             float_color->b = (color & 0x1f) / 31.0f;
1366             float_color->a = 1.0f;
1367             break;
1368
1369         case WINED3DFMT_B8G8R8_UNORM:
1370         case WINED3DFMT_B8G8R8X8_UNORM:
1371             float_color->r = D3DCOLOR_R(color);
1372             float_color->g = D3DCOLOR_G(color);
1373             float_color->b = D3DCOLOR_B(color);
1374             float_color->a = 1.0f;
1375             break;
1376
1377         case WINED3DFMT_B8G8R8A8_UNORM:
1378             float_color->r = D3DCOLOR_R(color);
1379             float_color->g = D3DCOLOR_G(color);
1380             float_color->b = D3DCOLOR_B(color);
1381             float_color->a = D3DCOLOR_A(color);
1382             break;
1383
1384         default:
1385             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1386             return FALSE;
1387     }
1388
1389     return TRUE;
1390 }
1391
1392 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1393 {
1394     const struct wined3d_format *format = surface->resource.format;
1395
1396     switch (format->id)
1397     {
1398         case WINED3DFMT_S1_UINT_D15_UNORM:
1399             *float_depth = depth / (float)0x00007fff;
1400             break;
1401
1402         case WINED3DFMT_D16_UNORM:
1403             *float_depth = depth / (float)0x0000ffff;
1404             break;
1405
1406         case WINED3DFMT_D24_UNORM_S8_UINT:
1407         case WINED3DFMT_X8D24_UNORM:
1408             *float_depth = depth / (float)0x00ffffff;
1409             break;
1410
1411         case WINED3DFMT_D32_UNORM:
1412             *float_depth = depth / (float)0xffffffff;
1413             break;
1414
1415         default:
1416             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1417             return FALSE;
1418     }
1419
1420     return TRUE;
1421 }
1422
1423 /* Do not call while under the GL lock. */
1424 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1425 {
1426     const struct wined3d_resource *resource = &surface->resource;
1427     struct wined3d_device *device = resource->device;
1428     const struct blit_shader *blitter;
1429
1430     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1431             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1432     if (!blitter)
1433     {
1434         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1435         return WINED3DERR_INVALIDCALL;
1436     }
1437
1438     return blitter->depth_fill(device, surface, rect, depth);
1439 }
1440
1441 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1442         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1443 {
1444     struct wined3d_device *device = src_surface->resource.device;
1445
1446     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1447             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1448             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1449         return WINED3DERR_INVALIDCALL;
1450
1451     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1452
1453     surface_modify_ds_location(dst_surface, SFLAG_DS_OFFSCREEN,
1454             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1455     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
1456
1457     return WINED3D_OK;
1458 }
1459
1460 /* Do not call while under the GL lock. */
1461 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1462         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1463         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1464 {
1465     const struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1466     struct wined3d_device *device = dst_surface->resource.device;
1467     DWORD src_ds_flags, dst_ds_flags;
1468     RECT src_rect, dst_rect;
1469
1470     static const DWORD simple_blit = WINEDDBLT_ASYNC
1471             | WINEDDBLT_COLORFILL
1472             | WINEDDBLT_WAIT
1473             | WINEDDBLT_DEPTHFILL
1474             | WINEDDBLT_DONOTWAIT;
1475
1476     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1477             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1478             flags, fx, debug_d3dtexturefiltertype(filter));
1479     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1480
1481     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1482     {
1483         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1484         return WINEDDERR_SURFACEBUSY;
1485     }
1486
1487     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1488
1489     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1490             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1491             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1492             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1493             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1494     {
1495         /* The destination rect can be out of bounds on the condition
1496          * that a clipper is set for the surface. */
1497         if (dst_surface->clipper)
1498             FIXME("Blit clipping not implemented.\n");
1499         else
1500             WARN("The application gave us a bad destination rectangle without a clipper set.\n");
1501         return WINEDDERR_INVALIDRECT;
1502     }
1503
1504     if (src_surface)
1505     {
1506         surface_get_rect(src_surface, src_rect_in, &src_rect);
1507
1508         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1509                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1510                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1511                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1512                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1513         {
1514             WARN("Application gave us bad source rectangle for Blt.\n");
1515             return WINEDDERR_INVALIDRECT;
1516         }
1517     }
1518     else
1519     {
1520         memset(&src_rect, 0, sizeof(src_rect));
1521     }
1522
1523     if (!fx || !(fx->dwDDFX))
1524         flags &= ~WINEDDBLT_DDFX;
1525
1526     if (flags & WINEDDBLT_WAIT)
1527         flags &= ~WINEDDBLT_WAIT;
1528
1529     if (flags & WINEDDBLT_ASYNC)
1530     {
1531         static unsigned int once;
1532
1533         if (!once++)
1534             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1535         flags &= ~WINEDDBLT_ASYNC;
1536     }
1537
1538     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1539     if (flags & WINEDDBLT_DONOTWAIT)
1540     {
1541         static unsigned int once;
1542
1543         if (!once++)
1544             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1545         flags &= ~WINEDDBLT_DONOTWAIT;
1546     }
1547
1548     if (!device->d3d_initialized)
1549     {
1550         WARN("D3D not initialized, using fallback.\n");
1551         goto cpu;
1552     }
1553
1554     if (flags & ~simple_blit)
1555     {
1556         WARN("Using fallback for complex blit (%#x).\n", flags);
1557         goto fallback;
1558     }
1559
1560     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1561         src_swapchain = src_surface->container.u.swapchain;
1562     else
1563         src_swapchain = NULL;
1564
1565     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1566         dst_swapchain = dst_surface->container.u.swapchain;
1567     else
1568         dst_swapchain = NULL;
1569
1570     /* This isn't strictly needed. FBO blits for example could deal with
1571      * cross-swapchain blits by first downloading the source to a texture
1572      * before switching to the destination context. We just have this here to
1573      * not have to deal with the issue, since cross-swapchain blits should be
1574      * rare. */
1575     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1576     {
1577         FIXME("Using fallback for cross-swapchain blit.\n");
1578         goto fallback;
1579     }
1580
1581     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1582     if (src_surface)
1583         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1584     else
1585         src_ds_flags = 0;
1586
1587     if (src_ds_flags || dst_ds_flags)
1588     {
1589         if (flags & WINEDDBLT_DEPTHFILL)
1590         {
1591             float depth;
1592
1593             TRACE("Depth fill.\n");
1594
1595             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1596                 return WINED3DERR_INVALIDCALL;
1597
1598             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1599                 return WINED3D_OK;
1600         }
1601         else
1602         {
1603             /* Accessing depth / stencil surfaces is supposed to fail while in
1604              * a scene, except for fills, which seem to work. */
1605             if (device->inScene)
1606             {
1607                 WARN("Rejecting depth / stencil access while in scene.\n");
1608                 return WINED3DERR_INVALIDCALL;
1609             }
1610
1611             if (src_ds_flags != dst_ds_flags)
1612             {
1613                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1614                 return WINED3DERR_INVALIDCALL;
1615             }
1616
1617             if (src_rect.top || src_rect.left
1618                     || src_rect.bottom != src_surface->resource.height
1619                     || src_rect.right != src_surface->resource.width)
1620             {
1621                 WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
1622                         wine_dbgstr_rect(&src_rect));
1623                 return WINED3DERR_INVALIDCALL;
1624             }
1625
1626             if (dst_rect.top || dst_rect.left
1627                     || dst_rect.bottom != dst_surface->resource.height
1628                     || dst_rect.right != dst_surface->resource.width)
1629             {
1630                 WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
1631                         wine_dbgstr_rect(&src_rect));
1632                 return WINED3DERR_INVALIDCALL;
1633             }
1634
1635             if (src_surface->resource.height != dst_surface->resource.height
1636                     || src_surface->resource.width != dst_surface->resource.width)
1637             {
1638                 WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
1639                 return WINED3DERR_INVALIDCALL;
1640             }
1641
1642             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1643                 return WINED3D_OK;
1644         }
1645     }
1646     else
1647     {
1648         if (flags & WINEDDBLT_COLORFILL)
1649         {
1650             WINED3DCOLORVALUE color;
1651
1652             TRACE("Color fill.\n");
1653
1654             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1655                 goto fallback;
1656
1657             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1658                 return WINED3D_OK;
1659         }
1660         else
1661         {
1662             TRACE("Color blit.\n");
1663
1664             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1665                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1666                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1667             {
1668                 TRACE("Using FBO blit.\n");
1669
1670                 surface_blt_fbo(device, filter,
1671                         src_surface, src_surface->draw_binding, &src_rect,
1672                         dst_surface, dst_surface->draw_binding, &dst_rect);
1673                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1674                 return WINED3D_OK;
1675             }
1676
1677             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1678                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1679                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1680             {
1681                 TRACE("Using arbfp blit.\n");
1682
1683                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1684                     return WINED3D_OK;
1685             }
1686         }
1687     }
1688
1689 fallback:
1690
1691     /* Special cases for render targets. */
1692     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1693             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1694     {
1695         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1696                 src_surface, &src_rect, flags, fx, filter)))
1697             return WINED3D_OK;
1698     }
1699
1700 cpu:
1701
1702     /* For the rest call the X11 surface implementation. For render targets
1703      * this should be implemented OpenGL accelerated in BltOverride, other
1704      * blits are rather rare. */
1705     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1706 }
1707
1708 /* Do not call while under the GL lock. */
1709 HRESULT CDECL wined3d_surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
1710         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD trans)
1711 {
1712     RECT src_rect, dst_rect;
1713     DWORD flags = 0;
1714
1715     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect_in %s, trans %#x.\n",
1716             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect_in), trans);
1717
1718     surface_get_rect(src_surface, src_rect_in, &src_rect);
1719
1720     dst_rect.left = dst_x;
1721     dst_rect.top = dst_y;
1722     dst_rect.right = dst_x + src_rect.right - src_rect.left;
1723     dst_rect.bottom = dst_y + src_rect.bottom - src_rect.top;
1724
1725     if (trans & WINEDDBLTFAST_SRCCOLORKEY)
1726         flags |= WINEDDBLT_KEYSRC;
1727     if (trans & WINEDDBLTFAST_DESTCOLORKEY)
1728         flags |= WINEDDBLT_KEYDEST;
1729     if (trans & WINEDDBLTFAST_WAIT)
1730         flags |= WINEDDBLT_WAIT;
1731     if (trans & WINEDDBLTFAST_DONOTWAIT)
1732         flags |= WINEDDBLT_DONOTWAIT;
1733
1734     return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, NULL, WINED3DTEXF_POINT);
1735 }
1736
1737 /* Context activation is done by the caller. */
1738 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1739 {
1740     if (!surface->resource.heapMemory)
1741     {
1742         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1743         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1744                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1745     }
1746
1747     ENTER_GL();
1748     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1749     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1750     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1751             surface->resource.size, surface->resource.allocatedMemory));
1752     checkGLcall("glGetBufferSubDataARB");
1753     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1754     checkGLcall("glDeleteBuffersARB");
1755     LEAVE_GL();
1756
1757     surface->pbo = 0;
1758     surface->flags &= ~SFLAG_PBO;
1759 }
1760
1761 /* Do not call while under the GL lock. */
1762 static void surface_unload(struct wined3d_resource *resource)
1763 {
1764     struct wined3d_surface *surface = surface_from_resource(resource);
1765     struct wined3d_renderbuffer_entry *entry, *entry2;
1766     struct wined3d_device *device = resource->device;
1767     const struct wined3d_gl_info *gl_info;
1768     struct wined3d_context *context;
1769
1770     TRACE("surface %p.\n", surface);
1771
1772     if (resource->pool == WINED3DPOOL_DEFAULT)
1773     {
1774         /* Default pool resources are supposed to be destroyed before Reset is called.
1775          * Implicit resources stay however. So this means we have an implicit render target
1776          * or depth stencil. The content may be destroyed, but we still have to tear down
1777          * opengl resources, so we cannot leave early.
1778          *
1779          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1780          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1781          * or the depth stencil into an FBO the texture or render buffer will be removed
1782          * and all flags get lost
1783          */
1784         surface_init_sysmem(surface);
1785     }
1786     else
1787     {
1788         /* Load the surface into system memory */
1789         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1790         surface_modify_location(surface, surface->draw_binding, FALSE);
1791     }
1792     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1793     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1794     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1795
1796     context = context_acquire(device, NULL);
1797     gl_info = context->gl_info;
1798
1799     /* Destroy PBOs, but load them into real sysmem before */
1800     if (surface->flags & SFLAG_PBO)
1801         surface_remove_pbo(surface, gl_info);
1802
1803     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1804      * all application-created targets the application has to release the surface
1805      * before calling _Reset
1806      */
1807     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1808     {
1809         ENTER_GL();
1810         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1811         LEAVE_GL();
1812         list_remove(&entry->entry);
1813         HeapFree(GetProcessHeap(), 0, entry);
1814     }
1815     list_init(&surface->renderbuffers);
1816     surface->current_renderbuffer = NULL;
1817
1818     ENTER_GL();
1819
1820     /* If we're in a texture, the texture name belongs to the texture.
1821      * Otherwise, destroy it. */
1822     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1823     {
1824         glDeleteTextures(1, &surface->texture_name);
1825         surface->texture_name = 0;
1826         glDeleteTextures(1, &surface->texture_name_srgb);
1827         surface->texture_name_srgb = 0;
1828     }
1829     if (surface->rb_multisample)
1830     {
1831         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1832         surface->rb_multisample = 0;
1833     }
1834     if (surface->rb_resolved)
1835     {
1836         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1837         surface->rb_resolved = 0;
1838     }
1839
1840     LEAVE_GL();
1841
1842     context_release(context);
1843
1844     resource_unload(resource);
1845 }
1846
1847 static const struct wined3d_resource_ops surface_resource_ops =
1848 {
1849     surface_unload,
1850 };
1851
1852 static const struct wined3d_surface_ops surface_ops =
1853 {
1854     surface_private_setup,
1855     surface_cleanup,
1856     surface_realize_palette,
1857     surface_draw_overlay,
1858     surface_preload,
1859     surface_map,
1860     surface_unmap,
1861     surface_getdc,
1862 };
1863
1864 /*****************************************************************************
1865  * Initializes the GDI surface, aka creates the DIB section we render to
1866  * The DIB section creation is done by calling GetDC, which will create the
1867  * section and releasing the dc to allow the app to use it. The dib section
1868  * will stay until the surface is released
1869  *
1870  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1871  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1872  * avoid confusion in the shared surface code.
1873  *
1874  * Returns:
1875  *  WINED3D_OK on success
1876  *  The return values of called methods on failure
1877  *
1878  *****************************************************************************/
1879 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1880 {
1881     HRESULT hr;
1882
1883     TRACE("surface %p.\n", surface);
1884
1885     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1886     {
1887         ERR("Overlays not yet supported by GDI surfaces.\n");
1888         return WINED3DERR_INVALIDCALL;
1889     }
1890
1891     /* Sysmem textures have memory already allocated - release it,
1892      * this avoids an unnecessary memcpy. */
1893     hr = surface_create_dib_section(surface);
1894     if (SUCCEEDED(hr))
1895     {
1896         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1897         surface->resource.heapMemory = NULL;
1898         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1899     }
1900
1901     /* We don't mind the nonpow2 stuff in GDI. */
1902     surface->pow2Width = surface->resource.width;
1903     surface->pow2Height = surface->resource.height;
1904
1905     return WINED3D_OK;
1906 }
1907
1908 static void surface_gdi_cleanup(struct wined3d_surface *surface)
1909 {
1910     TRACE("surface %p.\n", surface);
1911
1912     if (surface->flags & SFLAG_DIBSECTION)
1913     {
1914         /* Release the DC. */
1915         SelectObject(surface->hDC, surface->dib.holdbitmap);
1916         DeleteDC(surface->hDC);
1917         /* Release the DIB section. */
1918         DeleteObject(surface->dib.DIBsection);
1919         surface->dib.bitmap_data = NULL;
1920         surface->resource.allocatedMemory = NULL;
1921     }
1922
1923     if (surface->flags & SFLAG_USERPTR)
1924         wined3d_surface_set_mem(surface, NULL);
1925     if (surface->overlay_dest)
1926         list_remove(&surface->overlay_entry);
1927
1928     HeapFree(GetProcessHeap(), 0, surface->palette9);
1929
1930     resource_cleanup(&surface->resource);
1931 }
1932
1933 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1934 {
1935     struct wined3d_palette *palette = surface->palette;
1936
1937     TRACE("surface %p.\n", surface);
1938
1939     if (!palette) return;
1940
1941     if (surface->flags & SFLAG_DIBSECTION)
1942     {
1943         RGBQUAD col[256];
1944         unsigned int i;
1945
1946         TRACE("Updating the DC's palette.\n");
1947
1948         for (i = 0; i < 256; ++i)
1949         {
1950             col[i].rgbRed = palette->palents[i].peRed;
1951             col[i].rgbGreen = palette->palents[i].peGreen;
1952             col[i].rgbBlue = palette->palents[i].peBlue;
1953             col[i].rgbReserved = 0;
1954         }
1955         SetDIBColorTable(surface->hDC, 0, 256, col);
1956     }
1957
1958     /* Update the image because of the palette change. Some games like e.g.
1959      * Red Alert call SetEntries a lot to implement fading. */
1960     /* Tell the swapchain to update the screen. */
1961     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1962     {
1963         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1964         if (surface == swapchain->front_buffer)
1965         {
1966             x11_copy_to_screen(swapchain, NULL);
1967         }
1968     }
1969 }
1970
1971 static HRESULT gdi_surface_draw_overlay(struct wined3d_surface *surface)
1972 {
1973     FIXME("GDI surfaces can't draw overlays yet.\n");
1974     return E_FAIL;
1975 }
1976
1977 static void gdi_surface_preload(struct wined3d_surface *surface)
1978 {
1979     TRACE("surface %p.\n", surface);
1980
1981     ERR("Preloading GDI surfaces is not supported.\n");
1982 }
1983
1984 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
1985 {
1986     TRACE("surface %p, rect %s, flags %#x.\n",
1987             surface, wine_dbgstr_rect(rect), flags);
1988
1989     if (!surface->resource.allocatedMemory)
1990     {
1991         /* This happens on gdi surfaces if the application set a user pointer
1992          * and resets it. Recreate the DIB section. */
1993         surface_create_dib_section(surface);
1994         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1995     }
1996 }
1997
1998 static void gdi_surface_unmap(struct wined3d_surface *surface)
1999 {
2000     TRACE("surface %p.\n", surface);
2001
2002     /* Tell the swapchain to update the screen. */
2003     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2004     {
2005         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2006         if (surface == swapchain->front_buffer)
2007         {
2008             x11_copy_to_screen(swapchain, &surface->lockedRect);
2009         }
2010     }
2011
2012     memset(&surface->lockedRect, 0, sizeof(RECT));
2013 }
2014
2015 static HRESULT gdi_surface_getdc(struct wined3d_surface *surface)
2016 {
2017     WINED3DLOCKED_RECT lock;
2018     HRESULT hr;
2019
2020     TRACE("surface %p.\n", surface);
2021
2022     /* Should have a DIB section already. */
2023     if (!(surface->flags & SFLAG_DIBSECTION))
2024     {
2025         WARN("DC not supported on this surface\n");
2026         return WINED3DERR_INVALIDCALL;
2027     }
2028
2029     /* Map the surface. */
2030     hr = wined3d_surface_map(surface, &lock, NULL, 0);
2031     if (FAILED(hr))
2032         ERR("Map failed, hr %#x.\n", hr);
2033
2034     return hr;
2035 }
2036
2037 static const struct wined3d_surface_ops gdi_surface_ops =
2038 {
2039     gdi_surface_private_setup,
2040     surface_gdi_cleanup,
2041     gdi_surface_realize_palette,
2042     gdi_surface_draw_overlay,
2043     gdi_surface_preload,
2044     gdi_surface_map,
2045     gdi_surface_unmap,
2046     gdi_surface_getdc,
2047 };
2048
2049 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2050 {
2051     GLuint *name;
2052     DWORD flag;
2053
2054     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2055
2056     if(srgb)
2057     {
2058         name = &surface->texture_name_srgb;
2059         flag = SFLAG_INSRGBTEX;
2060     }
2061     else
2062     {
2063         name = &surface->texture_name;
2064         flag = SFLAG_INTEXTURE;
2065     }
2066
2067     if (!*name && new_name)
2068     {
2069         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2070          * surface has no texture name yet. See if we can get rid of this. */
2071         if (surface->flags & flag)
2072             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2073         surface_modify_location(surface, flag, FALSE);
2074     }
2075
2076     *name = new_name;
2077     surface_force_reload(surface);
2078 }
2079
2080 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
2081 {
2082     TRACE("surface %p, target %#x.\n", surface, target);
2083
2084     if (surface->texture_target != target)
2085     {
2086         if (target == GL_TEXTURE_RECTANGLE_ARB)
2087         {
2088             surface->flags &= ~SFLAG_NORMCOORD;
2089         }
2090         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2091         {
2092             surface->flags |= SFLAG_NORMCOORD;
2093         }
2094     }
2095     surface->texture_target = target;
2096     surface_force_reload(surface);
2097 }
2098
2099 /* Context activation is done by the caller. */
2100 void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
2101 {
2102     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
2103
2104     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2105     {
2106         struct wined3d_texture *texture = surface->container.u.texture;
2107
2108         TRACE("Passing to container (%p).\n", texture);
2109         texture->texture_ops->texture_bind(texture, context, srgb);
2110     }
2111     else
2112     {
2113         if (surface->texture_level)
2114         {
2115             ERR("Standalone surface %p is non-zero texture level %u.\n",
2116                     surface, surface->texture_level);
2117         }
2118
2119         if (srgb)
2120             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
2121
2122         ENTER_GL();
2123
2124         if (!surface->texture_name)
2125         {
2126             glGenTextures(1, &surface->texture_name);
2127             checkGLcall("glGenTextures");
2128
2129             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
2130
2131             context_bind_texture(context, surface->texture_target, surface->texture_name);
2132             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2133             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2134             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
2135             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2136             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2137             checkGLcall("glTexParameteri");
2138         }
2139         else
2140         {
2141             context_bind_texture(context, surface->texture_target, surface->texture_name);
2142         }
2143
2144         LEAVE_GL();
2145     }
2146 }
2147
2148 /* This call just downloads data, the caller is responsible for binding the
2149  * correct texture. */
2150 /* Context activation is done by the caller. */
2151 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2152 {
2153     const struct wined3d_format *format = surface->resource.format;
2154
2155     /* Only support read back of converted P8 surfaces. */
2156     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2157     {
2158         FIXME("Readback conversion not supported for format %s.\n", debug_d3dformat(format->id));
2159         return;
2160     }
2161
2162     ENTER_GL();
2163
2164     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2165     {
2166         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2167                 surface, surface->texture_level, format->glFormat, format->glType,
2168                 surface->resource.allocatedMemory);
2169
2170         if (surface->flags & SFLAG_PBO)
2171         {
2172             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2173             checkGLcall("glBindBufferARB");
2174             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2175             checkGLcall("glGetCompressedTexImageARB");
2176             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2177             checkGLcall("glBindBufferARB");
2178         }
2179         else
2180         {
2181             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2182                     surface->texture_level, surface->resource.allocatedMemory));
2183             checkGLcall("glGetCompressedTexImageARB");
2184         }
2185
2186         LEAVE_GL();
2187     }
2188     else
2189     {
2190         void *mem;
2191         GLenum gl_format = format->glFormat;
2192         GLenum gl_type = format->glType;
2193         int src_pitch = 0;
2194         int dst_pitch = 0;
2195
2196         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2197         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2198         {
2199             gl_format = GL_ALPHA;
2200             gl_type = GL_UNSIGNED_BYTE;
2201         }
2202
2203         if (surface->flags & SFLAG_NONPOW2)
2204         {
2205             unsigned char alignment = surface->resource.device->surface_alignment;
2206             src_pitch = format->byte_count * surface->pow2Width;
2207             dst_pitch = wined3d_surface_get_pitch(surface);
2208             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2209             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2210         }
2211         else
2212         {
2213             mem = surface->resource.allocatedMemory;
2214         }
2215
2216         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2217                 surface, surface->texture_level, gl_format, gl_type, mem);
2218
2219         if (surface->flags & SFLAG_PBO)
2220         {
2221             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2222             checkGLcall("glBindBufferARB");
2223
2224             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2225             checkGLcall("glGetTexImage");
2226
2227             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2228             checkGLcall("glBindBufferARB");
2229         }
2230         else
2231         {
2232             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2233             checkGLcall("glGetTexImage");
2234         }
2235         LEAVE_GL();
2236
2237         if (surface->flags & SFLAG_NONPOW2)
2238         {
2239             const BYTE *src_data;
2240             BYTE *dst_data;
2241             UINT y;
2242             /*
2243              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2244              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2245              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2246              *
2247              * We're doing this...
2248              *
2249              * instead of boxing the texture :
2250              * |<-texture width ->|  -->pow2width|   /\
2251              * |111111111111111111|              |   |
2252              * |222 Texture 222222| boxed empty  | texture height
2253              * |3333 Data 33333333|              |   |
2254              * |444444444444444444|              |   \/
2255              * -----------------------------------   |
2256              * |     boxed  empty | boxed empty  | pow2height
2257              * |                  |              |   \/
2258              * -----------------------------------
2259              *
2260              *
2261              * we're repacking the data to the expected texture width
2262              *
2263              * |<-texture width ->|  -->pow2width|   /\
2264              * |111111111111111111222222222222222|   |
2265              * |222333333333333333333444444444444| texture height
2266              * |444444                           |   |
2267              * |                                 |   \/
2268              * |                                 |   |
2269              * |            empty                | pow2height
2270              * |                                 |   \/
2271              * -----------------------------------
2272              *
2273              * == is the same as
2274              *
2275              * |<-texture width ->|    /\
2276              * |111111111111111111|
2277              * |222222222222222222|texture height
2278              * |333333333333333333|
2279              * |444444444444444444|    \/
2280              * --------------------
2281              *
2282              * this also means that any references to allocatedMemory should work with the data as if were a
2283              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2284              *
2285              * internally the texture is still stored in a boxed format so any references to textureName will
2286              * get a boxed texture with width pow2width and not a texture of width resource.width.
2287              *
2288              * Performance should not be an issue, because applications normally do not lock the surfaces when
2289              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2290              * and doesn't have to be re-read. */
2291             src_data = mem;
2292             dst_data = surface->resource.allocatedMemory;
2293             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2294             for (y = 1; y < surface->resource.height; ++y)
2295             {
2296                 /* skip the first row */
2297                 src_data += src_pitch;
2298                 dst_data += dst_pitch;
2299                 memcpy(dst_data, src_data, dst_pitch);
2300             }
2301
2302             HeapFree(GetProcessHeap(), 0, mem);
2303         }
2304     }
2305
2306     /* Surface has now been downloaded */
2307     surface->flags |= SFLAG_INSYSMEM;
2308 }
2309
2310 /* This call just uploads data, the caller is responsible for binding the
2311  * correct texture. */
2312 /* Context activation is done by the caller. */
2313 void surface_upload_data(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2314         const struct wined3d_format *format, const RECT *src_rect, UINT src_w, const POINT *dst_point,
2315         BOOL srgb, const struct wined3d_bo_address *data)
2316 {
2317     UINT update_w = src_rect->right - src_rect->left;
2318     UINT update_h = src_rect->bottom - src_rect->top;
2319
2320     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_w %u, dst_point %p, srgb %#x, data {%#x:%p}.\n",
2321             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_w,
2322             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2323
2324     if (format->heightscale != 1.0f && format->heightscale != 0.0f)
2325         update_h *= format->heightscale;
2326
2327     ENTER_GL();
2328
2329     if (data->buffer_object)
2330     {
2331         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2332         checkGLcall("glBindBufferARB");
2333     }
2334
2335     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2336     {
2337         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2338         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2339         UINT src_pitch = wined3d_format_calculate_size(format, 1, src_w, 1);
2340         const BYTE *addr = data->addr;
2341         GLenum internal;
2342
2343         addr += (src_rect->top / format->block_height) * src_pitch;
2344         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2345
2346         if (srgb)
2347             internal = format->glGammaInternal;
2348         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2349             internal = format->rtInternal;
2350         else
2351             internal = format->glInternal;
2352
2353         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2354                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2355                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2356
2357         if (row_length == src_pitch)
2358         {
2359             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2360                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2361         }
2362         else
2363         {
2364             UINT row, y;
2365
2366             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2367              * can't use the unpack row length like below. */
2368             for (row = 0, y = dst_point->y; row < row_count; ++row)
2369             {
2370                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2371                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2372                 y += format->block_height;
2373                 addr += src_pitch;
2374             }
2375         }
2376         checkGLcall("glCompressedTexSubImage2DARB");
2377     }
2378     else
2379     {
2380         const BYTE *addr = data->addr;
2381
2382         addr += src_rect->top * src_w * format->byte_count;
2383         addr += src_rect->left * format->byte_count;
2384
2385         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2386                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2387                 update_w, update_h, format->glFormat, format->glType, addr);
2388
2389         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_w);
2390         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2391                 update_w, update_h, format->glFormat, format->glType, addr);
2392         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2393         checkGLcall("glTexSubImage2D");
2394     }
2395
2396     if (data->buffer_object)
2397     {
2398         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2399         checkGLcall("glBindBufferARB");
2400     }
2401
2402     LEAVE_GL();
2403
2404     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2405     {
2406         struct wined3d_device *device = surface->resource.device;
2407         unsigned int i;
2408
2409         for (i = 0; i < device->context_count; ++i)
2410         {
2411             context_surface_update(device->contexts[i], surface);
2412         }
2413     }
2414 }
2415
2416 /* This call just allocates the texture, the caller is responsible for binding
2417  * the correct texture. */
2418 /* Context activation is done by the caller. */
2419 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2420         const struct wined3d_format *format, BOOL srgb)
2421 {
2422     BOOL enable_client_storage = FALSE;
2423     GLsizei width = surface->pow2Width;
2424     GLsizei height = surface->pow2Height;
2425     const BYTE *mem = NULL;
2426     GLenum internal;
2427
2428     if (srgb)
2429     {
2430         internal = format->glGammaInternal;
2431     }
2432     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2433     {
2434         internal = format->rtInternal;
2435     }
2436     else
2437     {
2438         internal = format->glInternal;
2439     }
2440
2441     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2442
2443     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2444             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2445             internal, width, height, format->glFormat, format->glType);
2446
2447     ENTER_GL();
2448
2449     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2450     {
2451         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2452                 || !surface->resource.allocatedMemory)
2453         {
2454             /* In some cases we want to disable client storage.
2455              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2456              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2457              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2458              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2459              */
2460             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2461             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2462             surface->flags &= ~SFLAG_CLIENT;
2463             enable_client_storage = TRUE;
2464         }
2465         else
2466         {
2467             surface->flags |= SFLAG_CLIENT;
2468
2469             /* Point OpenGL to our allocated texture memory. Do not use
2470              * resource.allocatedMemory here because it might point into a
2471              * PBO. Instead use heapMemory, but get the alignment right. */
2472             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2473                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2474         }
2475     }
2476
2477     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2478     {
2479         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2480                 internal, width, height, 0, surface->resource.size, mem));
2481         checkGLcall("glCompressedTexImage2DARB");
2482     }
2483     else
2484     {
2485         glTexImage2D(surface->texture_target, surface->texture_level,
2486                 internal, width, height, 0, format->glFormat, format->glType, mem);
2487         checkGLcall("glTexImage2D");
2488     }
2489
2490     if(enable_client_storage) {
2491         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2492         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2493     }
2494     LEAVE_GL();
2495 }
2496
2497 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2498  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2499 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2500 /* GL locking is done by the caller */
2501 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2502 {
2503     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2504     struct wined3d_renderbuffer_entry *entry;
2505     GLuint renderbuffer = 0;
2506     unsigned int src_width, src_height;
2507     unsigned int width, height;
2508
2509     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2510     {
2511         width = rt->pow2Width;
2512         height = rt->pow2Height;
2513     }
2514     else
2515     {
2516         width = surface->pow2Width;
2517         height = surface->pow2Height;
2518     }
2519
2520     src_width = surface->pow2Width;
2521     src_height = surface->pow2Height;
2522
2523     /* A depth stencil smaller than the render target is not valid */
2524     if (width > src_width || height > src_height) return;
2525
2526     /* Remove any renderbuffer set if the sizes match */
2527     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2528             || (width == src_width && height == src_height))
2529     {
2530         surface->current_renderbuffer = NULL;
2531         return;
2532     }
2533
2534     /* Look if we've already got a renderbuffer of the correct dimensions */
2535     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2536     {
2537         if (entry->width == width && entry->height == height)
2538         {
2539             renderbuffer = entry->id;
2540             surface->current_renderbuffer = entry;
2541             break;
2542         }
2543     }
2544
2545     if (!renderbuffer)
2546     {
2547         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2548         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2549         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2550                 surface->resource.format->glInternal, width, height);
2551
2552         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2553         entry->width = width;
2554         entry->height = height;
2555         entry->id = renderbuffer;
2556         list_add_head(&surface->renderbuffers, &entry->entry);
2557
2558         surface->current_renderbuffer = entry;
2559     }
2560
2561     checkGLcall("set_compatible_renderbuffer");
2562 }
2563
2564 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2565 {
2566     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2567
2568     TRACE("surface %p.\n", surface);
2569
2570     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2571     {
2572         ERR("Surface %p is not on a swapchain.\n", surface);
2573         return GL_NONE;
2574     }
2575
2576     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2577     {
2578         if (swapchain->render_to_fbo)
2579         {
2580             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2581             return GL_COLOR_ATTACHMENT0;
2582         }
2583         TRACE("Returning GL_BACK\n");
2584         return GL_BACK;
2585     }
2586     else if (surface == swapchain->front_buffer)
2587     {
2588         TRACE("Returning GL_FRONT\n");
2589         return GL_FRONT;
2590     }
2591
2592     FIXME("Higher back buffer, returning GL_BACK\n");
2593     return GL_BACK;
2594 }
2595
2596 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2597 void surface_add_dirty_rect(struct wined3d_surface *surface, const WINED3DBOX *dirty_rect)
2598 {
2599     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2600
2601     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2602         /* No partial locking for textures yet. */
2603         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2604
2605     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2606     if (dirty_rect)
2607     {
2608         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->Left);
2609         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->Top);
2610         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->Right);
2611         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->Bottom);
2612     }
2613     else
2614     {
2615         surface->dirtyRect.left = 0;
2616         surface->dirtyRect.top = 0;
2617         surface->dirtyRect.right = surface->resource.width;
2618         surface->dirtyRect.bottom = surface->resource.height;
2619     }
2620
2621     /* if the container is a texture then mark it dirty. */
2622     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2623     {
2624         TRACE("Passing to container.\n");
2625         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2626     }
2627 }
2628
2629 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2630 {
2631     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2632     BOOL ck_changed;
2633
2634     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2635
2636     if (surface->resource.pool == WINED3DPOOL_SCRATCH)
2637     {
2638         ERR("Not supported on scratch surfaces.\n");
2639         return WINED3DERR_INVALIDCALL;
2640     }
2641
2642     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2643
2644     /* Reload if either the texture and sysmem have different ideas about the
2645      * color key, or the actual key values changed. */
2646     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2647             && (surface->glCKey.dwColorSpaceLowValue != surface->SrcBltCKey.dwColorSpaceLowValue
2648             || surface->glCKey.dwColorSpaceHighValue != surface->SrcBltCKey.dwColorSpaceHighValue)))
2649     {
2650         TRACE("Reloading because of color keying\n");
2651         /* To perform the color key conversion we need a sysmem copy of
2652          * the surface. Make sure we have it. */
2653
2654         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2655         /* Make sure the texture is reloaded because of the color key change,
2656          * this kills performance though :( */
2657         /* TODO: This is not necessarily needed with hw palettized texture support. */
2658         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2659         /* Switching color keying on / off may change the internal format. */
2660         if (ck_changed)
2661             surface_force_reload(surface);
2662     }
2663     else if (!(surface->flags & flag))
2664     {
2665         TRACE("Reloading because surface is dirty.\n");
2666     }
2667     else
2668     {
2669         TRACE("surface is already in texture\n");
2670         return WINED3D_OK;
2671     }
2672
2673     /* No partial locking for textures yet. */
2674     surface_load_location(surface, flag, NULL);
2675     surface_evict_sysmem(surface);
2676
2677     return WINED3D_OK;
2678 }
2679
2680 /* See also float_16_to_32() in wined3d_private.h */
2681 static inline unsigned short float_32_to_16(const float *in)
2682 {
2683     int exp = 0;
2684     float tmp = fabsf(*in);
2685     unsigned int mantissa;
2686     unsigned short ret;
2687
2688     /* Deal with special numbers */
2689     if (*in == 0.0f)
2690         return 0x0000;
2691     if (isnan(*in))
2692         return 0x7c01;
2693     if (isinf(*in))
2694         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2695
2696     if (tmp < powf(2, 10))
2697     {
2698         do
2699         {
2700             tmp = tmp * 2.0f;
2701             exp--;
2702         } while (tmp < powf(2, 10));
2703     }
2704     else if (tmp >= powf(2, 11))
2705     {
2706         do
2707         {
2708             tmp /= 2.0f;
2709             exp++;
2710         } while (tmp >= powf(2, 11));
2711     }
2712
2713     mantissa = (unsigned int)tmp;
2714     if (tmp - mantissa >= 0.5f)
2715         ++mantissa; /* Round to nearest, away from zero. */
2716
2717     exp += 10;  /* Normalize the mantissa. */
2718     exp += 15;  /* Exponent is encoded with excess 15. */
2719
2720     if (exp > 30) /* too big */
2721     {
2722         ret = 0x7c00; /* INF */
2723     }
2724     else if (exp <= 0)
2725     {
2726         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2727         while (exp <= 0)
2728         {
2729             mantissa = mantissa >> 1;
2730             ++exp;
2731         }
2732         ret = mantissa & 0x3ff;
2733     }
2734     else
2735     {
2736         ret = (exp << 10) | (mantissa & 0x3ff);
2737     }
2738
2739     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2740     return ret;
2741 }
2742
2743 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2744 {
2745     ULONG refcount;
2746
2747     TRACE("Surface %p, container %p of type %#x.\n",
2748             surface, surface->container.u.base, surface->container.type);
2749
2750     switch (surface->container.type)
2751     {
2752         case WINED3D_CONTAINER_TEXTURE:
2753             return wined3d_texture_incref(surface->container.u.texture);
2754
2755         case WINED3D_CONTAINER_SWAPCHAIN:
2756             return wined3d_swapchain_incref(surface->container.u.swapchain);
2757
2758         default:
2759             ERR("Unhandled container type %#x.\n", surface->container.type);
2760         case WINED3D_CONTAINER_NONE:
2761             break;
2762     }
2763
2764     refcount = InterlockedIncrement(&surface->resource.ref);
2765     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2766
2767     return refcount;
2768 }
2769
2770 /* Do not call while under the GL lock. */
2771 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2772 {
2773     ULONG refcount;
2774
2775     TRACE("Surface %p, container %p of type %#x.\n",
2776             surface, surface->container.u.base, surface->container.type);
2777
2778     switch (surface->container.type)
2779     {
2780         case WINED3D_CONTAINER_TEXTURE:
2781             return wined3d_texture_decref(surface->container.u.texture);
2782
2783         case WINED3D_CONTAINER_SWAPCHAIN:
2784             return wined3d_swapchain_decref(surface->container.u.swapchain);
2785
2786         default:
2787             ERR("Unhandled container type %#x.\n", surface->container.type);
2788         case WINED3D_CONTAINER_NONE:
2789             break;
2790     }
2791
2792     refcount = InterlockedDecrement(&surface->resource.ref);
2793     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2794
2795     if (!refcount)
2796     {
2797         surface->surface_ops->surface_cleanup(surface);
2798         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2799
2800         TRACE("Destroyed surface %p.\n", surface);
2801         HeapFree(GetProcessHeap(), 0, surface);
2802     }
2803
2804     return refcount;
2805 }
2806
2807 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2808 {
2809     return resource_set_priority(&surface->resource, priority);
2810 }
2811
2812 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2813 {
2814     return resource_get_priority(&surface->resource);
2815 }
2816
2817 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2818 {
2819     TRACE("surface %p.\n", surface);
2820
2821     surface->surface_ops->surface_preload(surface);
2822 }
2823
2824 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2825 {
2826     TRACE("surface %p.\n", surface);
2827
2828     return surface->resource.parent;
2829 }
2830
2831 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2832 {
2833     TRACE("surface %p.\n", surface);
2834
2835     return &surface->resource;
2836 }
2837
2838 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2839 {
2840     TRACE("surface %p, flags %#x.\n", surface, flags);
2841
2842     switch (flags)
2843     {
2844         case WINEDDGBS_CANBLT:
2845         case WINEDDGBS_ISBLTDONE:
2846             return WINED3D_OK;
2847
2848         default:
2849             return WINED3DERR_INVALIDCALL;
2850     }
2851 }
2852
2853 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2854 {
2855     TRACE("surface %p, flags %#x.\n", surface, flags);
2856
2857     /* XXX: DDERR_INVALIDSURFACETYPE */
2858
2859     switch (flags)
2860     {
2861         case WINEDDGFS_CANFLIP:
2862         case WINEDDGFS_ISFLIPDONE:
2863             return WINED3D_OK;
2864
2865         default:
2866             return WINED3DERR_INVALIDCALL;
2867     }
2868 }
2869
2870 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2871 {
2872     TRACE("surface %p.\n", surface);
2873
2874     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2875     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2876 }
2877
2878 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2879 {
2880     TRACE("surface %p.\n", surface);
2881
2882     /* So far we don't lose anything :) */
2883     surface->flags &= ~SFLAG_LOST;
2884     return WINED3D_OK;
2885 }
2886
2887 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2888 {
2889     TRACE("surface %p, palette %p.\n", surface, palette);
2890
2891     if (surface->palette == palette)
2892     {
2893         TRACE("Nop palette change.\n");
2894         return WINED3D_OK;
2895     }
2896
2897     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
2898         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
2899
2900     surface->palette = palette;
2901
2902     if (palette)
2903     {
2904         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
2905             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
2906
2907         surface->surface_ops->surface_realize_palette(surface);
2908     }
2909
2910     return WINED3D_OK;
2911 }
2912
2913 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
2914         DWORD flags, const WINEDDCOLORKEY *color_key)
2915 {
2916     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
2917
2918     if (flags & WINEDDCKEY_COLORSPACE)
2919     {
2920         FIXME(" colorkey value not supported (%08x) !\n", flags);
2921         return WINED3DERR_INVALIDCALL;
2922     }
2923
2924     /* Dirtify the surface, but only if a key was changed. */
2925     if (color_key)
2926     {
2927         switch (flags & ~WINEDDCKEY_COLORSPACE)
2928         {
2929             case WINEDDCKEY_DESTBLT:
2930                 surface->DestBltCKey = *color_key;
2931                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
2932                 break;
2933
2934             case WINEDDCKEY_DESTOVERLAY:
2935                 surface->DestOverlayCKey = *color_key;
2936                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
2937                 break;
2938
2939             case WINEDDCKEY_SRCOVERLAY:
2940                 surface->SrcOverlayCKey = *color_key;
2941                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
2942                 break;
2943
2944             case WINEDDCKEY_SRCBLT:
2945                 surface->SrcBltCKey = *color_key;
2946                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
2947                 break;
2948         }
2949     }
2950     else
2951     {
2952         switch (flags & ~WINEDDCKEY_COLORSPACE)
2953         {
2954             case WINEDDCKEY_DESTBLT:
2955                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
2956                 break;
2957
2958             case WINEDDCKEY_DESTOVERLAY:
2959                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
2960                 break;
2961
2962             case WINEDDCKEY_SRCOVERLAY:
2963                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
2964                 break;
2965
2966             case WINEDDCKEY_SRCBLT:
2967                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
2968                 break;
2969         }
2970     }
2971
2972     return WINED3D_OK;
2973 }
2974
2975 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
2976 {
2977     TRACE("surface %p.\n", surface);
2978
2979     return surface->palette;
2980 }
2981
2982 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
2983 {
2984     const struct wined3d_format *format = surface->resource.format;
2985     DWORD pitch;
2986
2987     TRACE("surface %p.\n", surface);
2988
2989     if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
2990     {
2991         /* Since compressed formats are block based, pitch means the amount of
2992          * bytes to the next row of block rather than the next row of pixels. */
2993         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
2994         pitch = row_block_count * format->block_byte_count;
2995     }
2996     else
2997     {
2998         unsigned char alignment = surface->resource.device->surface_alignment;
2999         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3000         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3001     }
3002
3003     TRACE("Returning %u.\n", pitch);
3004
3005     return pitch;
3006 }
3007
3008 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3009 {
3010     TRACE("surface %p, mem %p.\n", surface, mem);
3011
3012     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
3013     {
3014         WARN("Surface is locked or the DC is in use.\n");
3015         return WINED3DERR_INVALIDCALL;
3016     }
3017
3018     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3019     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3020     {
3021         ERR("Not supported on render targets.\n");
3022         return WINED3DERR_INVALIDCALL;
3023     }
3024
3025     if (mem && mem != surface->resource.allocatedMemory)
3026     {
3027         void *release = NULL;
3028
3029         /* Do I have to copy the old surface content? */
3030         if (surface->flags & SFLAG_DIBSECTION)
3031         {
3032             SelectObject(surface->hDC, surface->dib.holdbitmap);
3033             DeleteDC(surface->hDC);
3034             /* Release the DIB section. */
3035             DeleteObject(surface->dib.DIBsection);
3036             surface->dib.bitmap_data = NULL;
3037             surface->resource.allocatedMemory = NULL;
3038             surface->hDC = NULL;
3039             surface->flags &= ~SFLAG_DIBSECTION;
3040         }
3041         else if (!(surface->flags & SFLAG_USERPTR))
3042         {
3043             release = surface->resource.heapMemory;
3044             surface->resource.heapMemory = NULL;
3045         }
3046         surface->resource.allocatedMemory = mem;
3047         surface->flags |= SFLAG_USERPTR;
3048
3049         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3050         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3051
3052         /* For client textures OpenGL has to be notified. */
3053         if (surface->flags & SFLAG_CLIENT)
3054             surface_release_client_storage(surface);
3055
3056         /* Now free the old memory if any. */
3057         HeapFree(GetProcessHeap(), 0, release);
3058     }
3059     else if (surface->flags & SFLAG_USERPTR)
3060     {
3061         /* HeapMemory should be NULL already. */
3062         if (surface->resource.heapMemory)
3063             ERR("User pointer surface has heap memory allocated.\n");
3064
3065         if (!mem)
3066         {
3067             surface->resource.allocatedMemory = NULL;
3068             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3069
3070             if (surface->flags & SFLAG_CLIENT)
3071                 surface_release_client_storage(surface);
3072
3073             surface_prepare_system_memory(surface);
3074         }
3075
3076         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3077     }
3078
3079     return WINED3D_OK;
3080 }
3081
3082 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3083 {
3084     LONG w, h;
3085
3086     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3087
3088     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3089     {
3090         WARN("Not an overlay surface.\n");
3091         return WINEDDERR_NOTAOVERLAYSURFACE;
3092     }
3093
3094     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3095     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3096     surface->overlay_destrect.left = x;
3097     surface->overlay_destrect.top = y;
3098     surface->overlay_destrect.right = x + w;
3099     surface->overlay_destrect.bottom = y + h;
3100
3101     surface->surface_ops->surface_draw_overlay(surface);
3102
3103     return WINED3D_OK;
3104 }
3105
3106 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3107 {
3108     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3109
3110     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3111     {
3112         TRACE("Not an overlay surface.\n");
3113         return WINEDDERR_NOTAOVERLAYSURFACE;
3114     }
3115
3116     if (!surface->overlay_dest)
3117     {
3118         TRACE("Overlay not visible.\n");
3119         *x = 0;
3120         *y = 0;
3121         return WINEDDERR_OVERLAYNOTVISIBLE;
3122     }
3123
3124     *x = surface->overlay_destrect.left;
3125     *y = surface->overlay_destrect.top;
3126
3127     TRACE("Returning position %d, %d.\n", *x, *y);
3128
3129     return WINED3D_OK;
3130 }
3131
3132 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3133         DWORD flags, struct wined3d_surface *ref)
3134 {
3135     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3136
3137     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3138     {
3139         TRACE("Not an overlay surface.\n");
3140         return WINEDDERR_NOTAOVERLAYSURFACE;
3141     }
3142
3143     return WINED3D_OK;
3144 }
3145
3146 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3147         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3148 {
3149     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3150             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3151
3152     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3153     {
3154         WARN("Not an overlay surface.\n");
3155         return WINEDDERR_NOTAOVERLAYSURFACE;
3156     }
3157     else if (!dst_surface)
3158     {
3159         WARN("Dest surface is NULL.\n");
3160         return WINED3DERR_INVALIDCALL;
3161     }
3162
3163     if (src_rect)
3164     {
3165         surface->overlay_srcrect = *src_rect;
3166     }
3167     else
3168     {
3169         surface->overlay_srcrect.left = 0;
3170         surface->overlay_srcrect.top = 0;
3171         surface->overlay_srcrect.right = surface->resource.width;
3172         surface->overlay_srcrect.bottom = surface->resource.height;
3173     }
3174
3175     if (dst_rect)
3176     {
3177         surface->overlay_destrect = *dst_rect;
3178     }
3179     else
3180     {
3181         surface->overlay_destrect.left = 0;
3182         surface->overlay_destrect.top = 0;
3183         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3184         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3185     }
3186
3187     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3188     {
3189         list_remove(&surface->overlay_entry);
3190     }
3191
3192     if (flags & WINEDDOVER_SHOW)
3193     {
3194         if (surface->overlay_dest != dst_surface)
3195         {
3196             surface->overlay_dest = dst_surface;
3197             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3198         }
3199     }
3200     else if (flags & WINEDDOVER_HIDE)
3201     {
3202         /* tests show that the rectangles are erased on hide */
3203         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3204         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3205         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3206         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3207         surface->overlay_dest = NULL;
3208     }
3209
3210     surface->surface_ops->surface_draw_overlay(surface);
3211
3212     return WINED3D_OK;
3213 }
3214
3215 HRESULT CDECL wined3d_surface_set_clipper(struct wined3d_surface *surface, struct wined3d_clipper *clipper)
3216 {
3217     TRACE("surface %p, clipper %p.\n", surface, clipper);
3218
3219     surface->clipper = clipper;
3220
3221     return WINED3D_OK;
3222 }
3223
3224 struct wined3d_clipper * CDECL wined3d_surface_get_clipper(const struct wined3d_surface *surface)
3225 {
3226     TRACE("surface %p.\n", surface);
3227
3228     return surface->clipper;
3229 }
3230
3231 HRESULT CDECL wined3d_surface_set_format(struct wined3d_surface *surface, enum wined3d_format_id format_id)
3232 {
3233     const struct wined3d_format *format = wined3d_get_format(&surface->resource.device->adapter->gl_info, format_id);
3234
3235     TRACE("surface %p, format %s.\n", surface, debug_d3dformat(format_id));
3236
3237     if (surface->resource.format->id != WINED3DFMT_UNKNOWN)
3238     {
3239         FIXME("The format of the surface must be WINED3DFORMAT_UNKNOWN.\n");
3240         return WINED3DERR_INVALIDCALL;
3241     }
3242
3243     surface->resource.size = wined3d_format_calculate_size(format, surface->resource.device->surface_alignment,
3244             surface->pow2Width, surface->pow2Height);
3245     surface->flags |= (WINED3DFMT_D16_LOCKABLE == format_id) ? SFLAG_LOCKABLE : 0;
3246     surface->resource.format = format;
3247
3248     TRACE("size %u, byte_count %u\n", surface->resource.size, format->byte_count);
3249     TRACE("glFormat %#x, glInternal %#x, glType %#x.\n",
3250             format->glFormat, format->glInternal, format->glType);
3251
3252     return WINED3D_OK;
3253 }
3254
3255 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3256         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3257 {
3258     unsigned short *dst_s;
3259     const float *src_f;
3260     unsigned int x, y;
3261
3262     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3263
3264     for (y = 0; y < h; ++y)
3265     {
3266         src_f = (const float *)(src + y * pitch_in);
3267         dst_s = (unsigned short *) (dst + y * pitch_out);
3268         for (x = 0; x < w; ++x)
3269         {
3270             dst_s[x] = float_32_to_16(src_f + x);
3271         }
3272     }
3273 }
3274
3275 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3276         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3277 {
3278     static const unsigned char convert_5to8[] =
3279     {
3280         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3281         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3282         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3283         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3284     };
3285     static const unsigned char convert_6to8[] =
3286     {
3287         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3288         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3289         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3290         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3291         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3292         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3293         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3294         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3295     };
3296     unsigned int x, y;
3297
3298     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3299
3300     for (y = 0; y < h; ++y)
3301     {
3302         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3303         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3304         for (x = 0; x < w; ++x)
3305         {
3306             WORD pixel = src_line[x];
3307             dst_line[x] = 0xff000000
3308                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3309                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3310                     | convert_5to8[(pixel & 0x001f)];
3311         }
3312     }
3313 }
3314
3315 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3316  * in both cases we're just setting the X / Alpha channel to 0xff. */
3317 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3318         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3319 {
3320     unsigned int x, y;
3321
3322     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3323
3324     for (y = 0; y < h; ++y)
3325     {
3326         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3327         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3328
3329         for (x = 0; x < w; ++x)
3330         {
3331             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3332         }
3333     }
3334 }
3335
3336 static inline BYTE cliptobyte(int x)
3337 {
3338     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3339 }
3340
3341 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3342         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3343 {
3344     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3345     unsigned int x, y;
3346
3347     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3348
3349     for (y = 0; y < h; ++y)
3350     {
3351         const BYTE *src_line = src + y * pitch_in;
3352         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3353         for (x = 0; x < w; ++x)
3354         {
3355             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3356              *     C = Y - 16; D = U - 128; E = V - 128;
3357              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3358              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3359              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3360              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3361              * U and V are shared between the pixels. */
3362             if (!(x & 1)) /* For every even pixel, read new U and V. */
3363             {
3364                 d = (int) src_line[1] - 128;
3365                 e = (int) src_line[3] - 128;
3366                 r2 = 409 * e + 128;
3367                 g2 = - 100 * d - 208 * e + 128;
3368                 b2 = 516 * d + 128;
3369             }
3370             c2 = 298 * ((int) src_line[0] - 16);
3371             dst_line[x] = 0xff000000
3372                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3373                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3374                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3375                 /* Scale RGB values to 0..255 range,
3376                  * then clip them if still not in range (may be negative),
3377                  * then shift them within DWORD if necessary. */
3378             src_line += 2;
3379         }
3380     }
3381 }
3382
3383 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3384         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3385 {
3386     unsigned int x, y;
3387     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3388
3389     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3390
3391     for (y = 0; y < h; ++y)
3392     {
3393         const BYTE *src_line = src + y * pitch_in;
3394         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3395         for (x = 0; x < w; ++x)
3396         {
3397             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3398              *     C = Y - 16; D = U - 128; E = V - 128;
3399              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3400              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3401              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3402              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3403              * U and V are shared between the pixels. */
3404             if (!(x & 1)) /* For every even pixel, read new U and V. */
3405             {
3406                 d = (int) src_line[1] - 128;
3407                 e = (int) src_line[3] - 128;
3408                 r2 = 409 * e + 128;
3409                 g2 = - 100 * d - 208 * e + 128;
3410                 b2 = 516 * d + 128;
3411             }
3412             c2 = 298 * ((int) src_line[0] - 16);
3413             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3414                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3415                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3416                 /* Scale RGB values to 0..255 range,
3417                  * then clip them if still not in range (may be negative),
3418                  * then shift them within DWORD if necessary. */
3419             src_line += 2;
3420         }
3421     }
3422 }
3423
3424 struct d3dfmt_convertor_desc
3425 {
3426     enum wined3d_format_id from, to;
3427     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3428 };
3429
3430 static const struct d3dfmt_convertor_desc convertors[] =
3431 {
3432     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3433     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3434     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3435     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3436     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3437     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3438 };
3439
3440 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3441         enum wined3d_format_id to)
3442 {
3443     unsigned int i;
3444
3445     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3446     {
3447         if (convertors[i].from == from && convertors[i].to == to)
3448             return &convertors[i];
3449     }
3450
3451     return NULL;
3452 }
3453
3454 /*****************************************************************************
3455  * surface_convert_format
3456  *
3457  * Creates a duplicate of a surface in a different format. Is used by Blt to
3458  * blit between surfaces with different formats.
3459  *
3460  * Parameters
3461  *  source: Source surface
3462  *  fmt: Requested destination format
3463  *
3464  *****************************************************************************/
3465 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3466 {
3467     const struct d3dfmt_convertor_desc *conv;
3468     WINED3DLOCKED_RECT lock_src, lock_dst;
3469     struct wined3d_surface *ret = NULL;
3470     HRESULT hr;
3471
3472     conv = find_convertor(source->resource.format->id, to_fmt);
3473     if (!conv)
3474     {
3475         FIXME("Cannot find a conversion function from format %s to %s.\n",
3476                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3477         return NULL;
3478     }
3479
3480     wined3d_surface_create(source->resource.device, source->resource.width,
3481             source->resource.height, to_fmt, TRUE /* lockable */, TRUE /* discard  */, 0 /* level */,
3482             0 /* usage */, WINED3DPOOL_SCRATCH, WINED3DMULTISAMPLE_NONE /* TODO: Multisampled conversion */,
3483             0 /* MultiSampleQuality */, source->surface_type, NULL /* parent */, &wined3d_null_parent_ops, &ret);
3484     if (!ret)
3485     {
3486         ERR("Failed to create a destination surface for conversion.\n");
3487         return NULL;
3488     }
3489
3490     memset(&lock_src, 0, sizeof(lock_src));
3491     memset(&lock_dst, 0, sizeof(lock_dst));
3492
3493     hr = wined3d_surface_map(source, &lock_src, NULL, WINED3DLOCK_READONLY);
3494     if (FAILED(hr))
3495     {
3496         ERR("Failed to lock the source surface.\n");
3497         wined3d_surface_decref(ret);
3498         return NULL;
3499     }
3500     hr = wined3d_surface_map(ret, &lock_dst, NULL, WINED3DLOCK_READONLY);
3501     if (FAILED(hr))
3502     {
3503         ERR("Failed to lock the destination surface.\n");
3504         wined3d_surface_unmap(source);
3505         wined3d_surface_decref(ret);
3506         return NULL;
3507     }
3508
3509     conv->convert(lock_src.pBits, lock_dst.pBits, lock_src.Pitch, lock_dst.Pitch,
3510             source->resource.width, source->resource.height);
3511
3512     wined3d_surface_unmap(ret);
3513     wined3d_surface_unmap(source);
3514
3515     return ret;
3516 }
3517
3518 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3519         unsigned int bpp, UINT pitch, DWORD color)
3520 {
3521     BYTE *first;
3522     int x, y;
3523
3524     /* Do first row */
3525
3526 #define COLORFILL_ROW(type) \
3527 do { \
3528     type *d = (type *)buf; \
3529     for (x = 0; x < width; ++x) \
3530         d[x] = (type)color; \
3531 } while(0)
3532
3533     switch (bpp)
3534     {
3535         case 1:
3536             COLORFILL_ROW(BYTE);
3537             break;
3538
3539         case 2:
3540             COLORFILL_ROW(WORD);
3541             break;
3542
3543         case 3:
3544         {
3545             BYTE *d = buf;
3546             for (x = 0; x < width; ++x, d += 3)
3547             {
3548                 d[0] = (color      ) & 0xFF;
3549                 d[1] = (color >>  8) & 0xFF;
3550                 d[2] = (color >> 16) & 0xFF;
3551             }
3552             break;
3553         }
3554         case 4:
3555             COLORFILL_ROW(DWORD);
3556             break;
3557
3558         default:
3559             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3560             return WINED3DERR_NOTAVAILABLE;
3561     }
3562
3563 #undef COLORFILL_ROW
3564
3565     /* Now copy first row. */
3566     first = buf;
3567     for (y = 1; y < height; ++y)
3568     {
3569         buf += pitch;
3570         memcpy(buf, first, width * bpp);
3571     }
3572
3573     return WINED3D_OK;
3574 }
3575
3576 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3577 {
3578     TRACE("surface %p.\n", surface);
3579
3580     if (!(surface->flags & SFLAG_LOCKED))
3581     {
3582         WARN("Trying to unmap unmapped surface.\n");
3583         return WINEDDERR_NOTLOCKED;
3584     }
3585     surface->flags &= ~SFLAG_LOCKED;
3586
3587     surface->surface_ops->surface_unmap(surface);
3588
3589     return WINED3D_OK;
3590 }
3591
3592 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3593         WINED3DLOCKED_RECT *locked_rect, const RECT *rect, DWORD flags)
3594 {
3595     TRACE("surface %p, locked_rect %p, rect %s, flags %#x.\n",
3596             surface, locked_rect, wine_dbgstr_rect(rect), flags);
3597
3598     if (surface->flags & SFLAG_LOCKED)
3599     {
3600         WARN("Surface is already mapped.\n");
3601         return WINED3DERR_INVALIDCALL;
3602     }
3603     surface->flags |= SFLAG_LOCKED;
3604
3605     if (!(surface->flags & SFLAG_LOCKABLE))
3606         WARN("Trying to lock unlockable surface.\n");
3607
3608     surface->surface_ops->surface_map(surface, rect, flags);
3609
3610     locked_rect->Pitch = wined3d_surface_get_pitch(surface);
3611
3612     if (!rect)
3613     {
3614         locked_rect->pBits = surface->resource.allocatedMemory;
3615         surface->lockedRect.left = 0;
3616         surface->lockedRect.top = 0;
3617         surface->lockedRect.right = surface->resource.width;
3618         surface->lockedRect.bottom = surface->resource.height;
3619     }
3620     else
3621     {
3622         const struct wined3d_format *format = surface->resource.format;
3623
3624         if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3625         {
3626             /* Compressed textures are block based, so calculate the offset of
3627              * the block that contains the top-left pixel of the locked rectangle. */
3628             locked_rect->pBits = surface->resource.allocatedMemory
3629                     + ((rect->top / format->block_height) * locked_rect->Pitch)
3630                     + ((rect->left / format->block_width) * format->block_byte_count);
3631         }
3632         else
3633         {
3634             locked_rect->pBits = surface->resource.allocatedMemory
3635                     + (locked_rect->Pitch * rect->top)
3636                     + (rect->left * format->byte_count);
3637         }
3638         surface->lockedRect.left = rect->left;
3639         surface->lockedRect.top = rect->top;
3640         surface->lockedRect.right = rect->right;
3641         surface->lockedRect.bottom = rect->bottom;
3642     }
3643
3644     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3645     TRACE("Returning memory %p, pitch %u.\n", locked_rect->pBits, locked_rect->Pitch);
3646
3647     return WINED3D_OK;
3648 }
3649
3650 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3651 {
3652     HRESULT hr;
3653
3654     TRACE("surface %p, dc %p.\n", surface, dc);
3655
3656     if (surface->flags & SFLAG_USERPTR)
3657     {
3658         ERR("Not supported on surfaces with application-provided memory.\n");
3659         return WINEDDERR_NODC;
3660     }
3661
3662     /* Give more detailed info for ddraw. */
3663     if (surface->flags & SFLAG_DCINUSE)
3664         return WINEDDERR_DCALREADYCREATED;
3665
3666     /* Can't GetDC if the surface is locked. */
3667     if (surface->flags & SFLAG_LOCKED)
3668         return WINED3DERR_INVALIDCALL;
3669
3670     hr = surface->surface_ops->surface_getdc(surface);
3671     if (FAILED(hr))
3672         return hr;
3673
3674     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3675             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3676     {
3677         /* GetDC on palettized formats is unsupported in D3D9, and the method
3678          * is missing in D3D8, so this should only be used for DX <=7
3679          * surfaces (with non-device palettes). */
3680         const PALETTEENTRY *pal = NULL;
3681
3682         if (surface->palette)
3683         {
3684             pal = surface->palette->palents;
3685         }
3686         else
3687         {
3688             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3689             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3690
3691             if (dds_primary && dds_primary->palette)
3692                 pal = dds_primary->palette->palents;
3693         }
3694
3695         if (pal)
3696         {
3697             RGBQUAD col[256];
3698             unsigned int i;
3699
3700             for (i = 0; i < 256; ++i)
3701             {
3702                 col[i].rgbRed = pal[i].peRed;
3703                 col[i].rgbGreen = pal[i].peGreen;
3704                 col[i].rgbBlue = pal[i].peBlue;
3705                 col[i].rgbReserved = 0;
3706             }
3707             SetDIBColorTable(surface->hDC, 0, 256, col);
3708         }
3709     }
3710
3711     surface->flags |= SFLAG_DCINUSE;
3712
3713     *dc = surface->hDC;
3714     TRACE("Returning dc %p.\n", *dc);
3715
3716     return WINED3D_OK;
3717 }
3718
3719 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3720 {
3721     TRACE("surface %p, dc %p.\n", surface, dc);
3722
3723     if (!(surface->flags & SFLAG_DCINUSE))
3724         return WINEDDERR_NODC;
3725
3726     if (surface->hDC != dc)
3727     {
3728         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3729                 dc, surface->hDC);
3730         return WINEDDERR_NODC;
3731     }
3732
3733     /* Copy the contents of the DIB over to the PBO. */
3734     if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
3735         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
3736
3737     /* We locked first, so unlock now. */
3738     wined3d_surface_unmap(surface);
3739
3740     surface->flags &= ~SFLAG_DCINUSE;
3741
3742     return WINED3D_OK;
3743 }
3744
3745 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3746 {
3747     struct wined3d_swapchain *swapchain;
3748
3749     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3750
3751     if (flags)
3752     {
3753         static UINT once;
3754         if (!once++)
3755             FIXME("Ignoring flags %#x.\n", flags);
3756         else
3757             WARN("Ignoring flags %#x.\n", flags);
3758     }
3759
3760     /* FIXME: This will also prevent overlay flips, since overlays aren't on
3761      * a swapchain either. */
3762     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
3763     {
3764         ERR("Flipped surface is not on a swapchain.\n");
3765         return WINEDDERR_NOTFLIPPABLE;
3766     }
3767     swapchain = surface->container.u.swapchain;
3768
3769     /* Flipping is only supported on render targets and overlays. */
3770     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
3771     {
3772         WARN("Tried to flip a non-render target, non-overlay surface.\n");
3773         return WINEDDERR_NOTFLIPPABLE;
3774     }
3775
3776     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
3777     {
3778         flip_surface(surface, override);
3779
3780         /* Update the overlay if it is visible */
3781         if (surface->overlay_dest)
3782             return surface->surface_ops->surface_draw_overlay(surface);
3783         else
3784             return WINED3D_OK;
3785     }
3786
3787     return wined3d_swapchain_present(swapchain, NULL, NULL, swapchain->win_handle, NULL, 0);
3788 }
3789
3790 /* Do not call while under the GL lock. */
3791 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3792 {
3793     struct wined3d_device *device = surface->resource.device;
3794
3795     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3796
3797     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3798     {
3799         struct wined3d_texture *texture = surface->container.u.texture;
3800
3801         TRACE("Passing to container (%p).\n", texture);
3802         texture->texture_ops->texture_preload(texture, srgb);
3803     }
3804     else
3805     {
3806         struct wined3d_context *context;
3807
3808         TRACE("(%p) : About to load surface\n", surface);
3809
3810         /* TODO: Use already acquired context when possible. */
3811         context = context_acquire(device, NULL);
3812
3813         if (surface->resource.format->id == WINED3DFMT_P8_UINT
3814                 || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3815         {
3816             if (palette9_changed(surface))
3817             {
3818                 TRACE("Reloading surface because the d3d8/9 palette was changed\n");
3819                 /* TODO: This is not necessarily needed with hw palettized texture support */
3820                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3821                 /* Make sure the texture is reloaded because of the palette change, this kills performance though :( */
3822                 surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
3823             }
3824         }
3825
3826         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3827
3828         if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3829         {
3830             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3831             GLclampf tmp;
3832             tmp = 0.9f;
3833             ENTER_GL();
3834             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3835             LEAVE_GL();
3836         }
3837
3838         context_release(context);
3839     }
3840 }
3841
3842 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3843 {
3844     if (!surface->resource.allocatedMemory)
3845     {
3846         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
3847                 surface->resource.size + RESOURCE_ALIGNMENT);
3848         if (!surface->resource.heapMemory)
3849         {
3850             ERR("Out of memory\n");
3851             return FALSE;
3852         }
3853         surface->resource.allocatedMemory =
3854             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
3855     }
3856     else
3857     {
3858         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
3859     }
3860
3861     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3862
3863     return TRUE;
3864 }
3865
3866 /* Read the framebuffer back into the surface */
3867 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
3868 {
3869     struct wined3d_device *device = surface->resource.device;
3870     const struct wined3d_gl_info *gl_info;
3871     struct wined3d_context *context;
3872     BYTE *mem;
3873     GLint fmt;
3874     GLint type;
3875     BYTE *row, *top, *bottom;
3876     int i;
3877     BOOL bpp;
3878     RECT local_rect;
3879     BOOL srcIsUpsideDown;
3880     GLint rowLen = 0;
3881     GLint skipPix = 0;
3882     GLint skipRow = 0;
3883
3884     if(wined3d_settings.rendertargetlock_mode == RTL_DISABLE) {
3885         static BOOL warned = FALSE;
3886         if(!warned) {
3887             ERR("The application tries to lock the render target, but render target locking is disabled\n");
3888             warned = TRUE;
3889         }
3890         return;
3891     }
3892
3893     context = context_acquire(device, surface);
3894     context_apply_blit_state(context, device);
3895     gl_info = context->gl_info;
3896
3897     ENTER_GL();
3898
3899     /* Select the correct read buffer, and give some debug output.
3900      * There is no need to keep track of the current read buffer or reset it, every part of the code
3901      * that reads sets the read buffer as desired.
3902      */
3903     if (surface_is_offscreen(surface))
3904     {
3905         /* Mapping the primary render target which is not on a swapchain.
3906          * Read from the back buffer. */
3907         TRACE("Mapping offscreen render target.\n");
3908         glReadBuffer(device->offscreenBuffer);
3909         srcIsUpsideDown = TRUE;
3910     }
3911     else
3912     {
3913         /* Onscreen surfaces are always part of a swapchain */
3914         GLenum buffer = surface_get_gl_buffer(surface);
3915         TRACE("Mapping %#x buffer.\n", buffer);
3916         glReadBuffer(buffer);
3917         checkGLcall("glReadBuffer");
3918         srcIsUpsideDown = FALSE;
3919     }
3920
3921     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
3922     if (!rect)
3923     {
3924         local_rect.left = 0;
3925         local_rect.top = 0;
3926         local_rect.right = surface->resource.width;
3927         local_rect.bottom = surface->resource.height;
3928     }
3929     else
3930     {
3931         local_rect = *rect;
3932     }
3933     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
3934
3935     switch (surface->resource.format->id)
3936     {
3937         case WINED3DFMT_P8_UINT:
3938         {
3939             if (primary_render_target_is_p8(device))
3940             {
3941                 /* In case of P8 render targets the index is stored in the alpha component */
3942                 fmt = GL_ALPHA;
3943                 type = GL_UNSIGNED_BYTE;
3944                 mem = dest;
3945                 bpp = surface->resource.format->byte_count;
3946             }
3947             else
3948             {
3949                 /* GL can't return palettized data, so read ARGB pixels into a
3950                  * separate block of memory and convert them into palettized format
3951                  * in software. Slow, but if the app means to use palettized render
3952                  * targets and locks it...
3953                  *
3954                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
3955                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
3956                  * for the color channels when palettizing the colors.
3957                  */
3958                 fmt = GL_RGB;
3959                 type = GL_UNSIGNED_BYTE;
3960                 pitch *= 3;
3961                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
3962                 if (!mem)
3963                 {
3964                     ERR("Out of memory\n");
3965                     LEAVE_GL();
3966                     return;
3967                 }
3968                 bpp = surface->resource.format->byte_count * 3;
3969             }
3970         }
3971         break;
3972
3973         default:
3974             mem = dest;
3975             fmt = surface->resource.format->glFormat;
3976             type = surface->resource.format->glType;
3977             bpp = surface->resource.format->byte_count;
3978     }
3979
3980     if (surface->flags & SFLAG_PBO)
3981     {
3982         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
3983         checkGLcall("glBindBufferARB");
3984         if (mem)
3985         {
3986             ERR("mem not null for pbo -- unexpected\n");
3987             mem = NULL;
3988         }
3989     }
3990
3991     /* Save old pixel store pack state */
3992     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
3993     checkGLcall("glGetIntegerv");
3994     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
3995     checkGLcall("glGetIntegerv");
3996     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
3997     checkGLcall("glGetIntegerv");
3998
3999     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4000     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4001     checkGLcall("glPixelStorei");
4002     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4003     checkGLcall("glPixelStorei");
4004     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4005     checkGLcall("glPixelStorei");
4006
4007     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4008             local_rect.right - local_rect.left,
4009             local_rect.bottom - local_rect.top,
4010             fmt, type, mem);
4011     checkGLcall("glReadPixels");
4012
4013     /* Reset previous pixel store pack state */
4014     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4015     checkGLcall("glPixelStorei");
4016     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4017     checkGLcall("glPixelStorei");
4018     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4019     checkGLcall("glPixelStorei");
4020
4021     if (surface->flags & SFLAG_PBO)
4022     {
4023         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4024         checkGLcall("glBindBufferARB");
4025
4026         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4027          * to get a pointer to it and perform the flipping in software. This is a lot
4028          * faster than calling glReadPixels for each line. In case we want more speed
4029          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4030         if (!srcIsUpsideDown)
4031         {
4032             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4033             checkGLcall("glBindBufferARB");
4034
4035             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4036             checkGLcall("glMapBufferARB");
4037         }
4038     }
4039
4040     /* TODO: Merge this with the palettization loop below for P8 targets */
4041     if(!srcIsUpsideDown) {
4042         UINT len, off;
4043         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4044             Flip the lines in software */
4045         len = (local_rect.right - local_rect.left) * bpp;
4046         off = local_rect.left * bpp;
4047
4048         row = HeapAlloc(GetProcessHeap(), 0, len);
4049         if(!row) {
4050             ERR("Out of memory\n");
4051             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4052                 HeapFree(GetProcessHeap(), 0, mem);
4053             LEAVE_GL();
4054             return;
4055         }
4056
4057         top = mem + pitch * local_rect.top;
4058         bottom = mem + pitch * (local_rect.bottom - 1);
4059         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4060             memcpy(row, top + off, len);
4061             memcpy(top + off, bottom + off, len);
4062             memcpy(bottom + off, row, len);
4063             top += pitch;
4064             bottom -= pitch;
4065         }
4066         HeapFree(GetProcessHeap(), 0, row);
4067
4068         /* Unmap the temp PBO buffer */
4069         if (surface->flags & SFLAG_PBO)
4070         {
4071             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4072             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4073         }
4074     }
4075
4076     LEAVE_GL();
4077     context_release(context);
4078
4079     /* For P8 textures we need to perform an inverse palette lookup. This is
4080      * done by searching for a palette index which matches the RGB value.
4081      * Note this isn't guaranteed to work when there are multiple entries for
4082      * the same color but we have no choice. In case of P8 render targets,
4083      * the index is stored in the alpha component so no conversion is needed. */
4084     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4085     {
4086         const PALETTEENTRY *pal = NULL;
4087         DWORD width = pitch / 3;
4088         int x, y, c;
4089
4090         if (surface->palette)
4091         {
4092             pal = surface->palette->palents;
4093         }
4094         else
4095         {
4096             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4097             HeapFree(GetProcessHeap(), 0, mem);
4098             return;
4099         }
4100
4101         for(y = local_rect.top; y < local_rect.bottom; y++) {
4102             for(x = local_rect.left; x < local_rect.right; x++) {
4103                 /*                      start              lines            pixels      */
4104                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4105                 const BYTE *green = blue  + 1;
4106                 const BYTE *red = green + 1;
4107
4108                 for(c = 0; c < 256; c++) {
4109                     if(*red   == pal[c].peRed   &&
4110                        *green == pal[c].peGreen &&
4111                        *blue  == pal[c].peBlue)
4112                     {
4113                         *((BYTE *) dest + y * width + x) = c;
4114                         break;
4115                     }
4116                 }
4117             }
4118         }
4119         HeapFree(GetProcessHeap(), 0, mem);
4120     }
4121 }
4122
4123 /* Read the framebuffer contents into a texture */
4124 static void read_from_framebuffer_texture(struct wined3d_surface *surface, BOOL srgb)
4125 {
4126     struct wined3d_device *device = surface->resource.device;
4127     struct wined3d_context *context;
4128
4129     if (!surface_is_offscreen(surface))
4130     {
4131         /* We would need to flip onscreen surfaces, but there's no efficient
4132          * way to do that here. It makes more sense for the caller to
4133          * explicitly go through sysmem. */
4134         ERR("Not supported for onscreen targets.\n");
4135         return;
4136     }
4137
4138     /* Activate the surface to read from. In some situations it isn't the currently active target(e.g. backbuffer
4139      * locking during offscreen rendering). RESOURCELOAD is ok because glCopyTexSubImage2D isn't affected by any
4140      * states in the stateblock, and no driver was found yet that had bugs in that regard.
4141      */
4142     context = context_acquire(device, surface);
4143     device_invalidate_state(device, STATE_FRAMEBUFFER);
4144
4145     surface_prepare_texture(surface, context, srgb);
4146     surface_bind_and_dirtify(surface, context, srgb);
4147
4148     TRACE("Reading back offscreen render target %p.\n", surface);
4149
4150     ENTER_GL();
4151
4152     glReadBuffer(device->offscreenBuffer);
4153     checkGLcall("glReadBuffer");
4154
4155     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4156             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4157     checkGLcall("glCopyTexSubImage2D");
4158
4159     LEAVE_GL();
4160
4161     context_release(context);
4162 }
4163
4164 /* Context activation is done by the caller. */
4165 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4166         struct wined3d_context *context, BOOL srgb)
4167 {
4168     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4169     CONVERT_TYPES convert;
4170     struct wined3d_format format;
4171
4172     if (surface->flags & alloc_flag) return;
4173
4174     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4175     if (convert != NO_CONVERSION || format.convert) surface->flags |= SFLAG_CONVERTED;
4176     else surface->flags &= ~SFLAG_CONVERTED;
4177
4178     surface_bind_and_dirtify(surface, context, srgb);
4179     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4180     surface->flags |= alloc_flag;
4181 }
4182
4183 /* Context activation is done by the caller. */
4184 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4185 {
4186     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4187     {
4188         struct wined3d_texture *texture = surface->container.u.texture;
4189         UINT sub_count = texture->level_count * texture->layer_count;
4190         UINT i;
4191
4192         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4193
4194         for (i = 0; i < sub_count; ++i)
4195         {
4196             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4197             surface_prepare_texture_internal(s, context, srgb);
4198         }
4199
4200         return;
4201     }
4202
4203     surface_prepare_texture_internal(surface, context, srgb);
4204 }
4205
4206 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4207 {
4208     if (multisample)
4209     {
4210         if (surface->rb_multisample)
4211             return;
4212
4213         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4214         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4215         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4216                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4217         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4218     }
4219     else
4220     {
4221         if (surface->rb_resolved)
4222             return;
4223
4224         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4225         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4226         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4227                 surface->pow2Width, surface->pow2Height);
4228         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4229     }
4230 }
4231
4232 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4233         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4234 {
4235     struct wined3d_device *device = surface->resource.device;
4236     UINT pitch = wined3d_surface_get_pitch(surface);
4237     const struct wined3d_gl_info *gl_info;
4238     struct wined3d_context *context;
4239     RECT local_rect;
4240     UINT w, h;
4241
4242     surface_get_rect(surface, rect, &local_rect);
4243
4244     mem += local_rect.top * pitch + local_rect.left * bpp;
4245     w = local_rect.right - local_rect.left;
4246     h = local_rect.bottom - local_rect.top;
4247
4248     /* Activate the correct context for the render target */
4249     context = context_acquire(device, surface);
4250     context_apply_blit_state(context, device);
4251     gl_info = context->gl_info;
4252
4253     ENTER_GL();
4254
4255     if (!surface_is_offscreen(surface))
4256     {
4257         GLenum buffer = surface_get_gl_buffer(surface);
4258         TRACE("Unlocking %#x buffer.\n", buffer);
4259         context_set_draw_buffer(context, buffer);
4260
4261         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4262         glPixelZoom(1.0f, -1.0f);
4263     }
4264     else
4265     {
4266         /* Primary offscreen render target */
4267         TRACE("Offscreen render target.\n");
4268         context_set_draw_buffer(context, device->offscreenBuffer);
4269
4270         glPixelZoom(1.0f, 1.0f);
4271     }
4272
4273     glRasterPos3i(local_rect.left, local_rect.top, 1);
4274     checkGLcall("glRasterPos3i");
4275
4276     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4277     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4278
4279     if (surface->flags & SFLAG_PBO)
4280     {
4281         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4282         checkGLcall("glBindBufferARB");
4283     }
4284
4285     glDrawPixels(w, h, fmt, type, mem);
4286     checkGLcall("glDrawPixels");
4287
4288     if (surface->flags & SFLAG_PBO)
4289     {
4290         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4291         checkGLcall("glBindBufferARB");
4292     }
4293
4294     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4295     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4296
4297     LEAVE_GL();
4298
4299     if (wined3d_settings.strict_draw_ordering
4300             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4301             && surface->container.u.swapchain->front_buffer == surface))
4302         wglFlush();
4303
4304     context_release(context);
4305 }
4306
4307 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck,
4308         BOOL use_texturing, struct wined3d_format *format, CONVERT_TYPES *convert)
4309 {
4310     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4311     const struct wined3d_device *device = surface->resource.device;
4312     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4313     BOOL blit_supported = FALSE;
4314
4315     /* Copy the default values from the surface. Below we might perform fixups */
4316     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4317     *format = *surface->resource.format;
4318     *convert = NO_CONVERSION;
4319
4320     /* Ok, now look if we have to do any conversion */
4321     switch (surface->resource.format->id)
4322     {
4323         case WINED3DFMT_P8_UINT:
4324             /* Below the call to blit_supported is disabled for Wine 1.2
4325              * because the function isn't operating correctly yet. At the
4326              * moment 8-bit blits are handled in software and if certain GL
4327              * extensions are around, surface conversion is performed at
4328              * upload time. The blit_supported call recognizes it as a
4329              * destination fixup. This type of upload 'fixup' and 8-bit to
4330              * 8-bit blits need to be handled by the blit_shader.
4331              * TODO: get rid of this #if 0. */
4332 #if 0
4333             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4334                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4335                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4336 #endif
4337             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4338
4339             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4340              * texturing. Further also use conversion in case of color keying.
4341              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4342              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4343              * conflicts with this.
4344              */
4345             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4346                     || colorkey_active || !use_texturing)
4347             {
4348                 format->glFormat = GL_RGBA;
4349                 format->glInternal = GL_RGBA;
4350                 format->glType = GL_UNSIGNED_BYTE;
4351                 format->conv_byte_count = 4;
4352                 if (colorkey_active)
4353                     *convert = CONVERT_PALETTED_CK;
4354                 else
4355                     *convert = CONVERT_PALETTED;
4356             }
4357             break;
4358
4359         case WINED3DFMT_B2G3R3_UNORM:
4360             /* **********************
4361                 GL_UNSIGNED_BYTE_3_3_2
4362                 ********************** */
4363             if (colorkey_active) {
4364                 /* This texture format will never be used.. So do not care about color keying
4365                     up until the point in time it will be needed :-) */
4366                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4367             }
4368             break;
4369
4370         case WINED3DFMT_B5G6R5_UNORM:
4371             if (colorkey_active)
4372             {
4373                 *convert = CONVERT_CK_565;
4374                 format->glFormat = GL_RGBA;
4375                 format->glInternal = GL_RGB5_A1;
4376                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4377                 format->conv_byte_count = 2;
4378             }
4379             break;
4380
4381         case WINED3DFMT_B5G5R5X1_UNORM:
4382             if (colorkey_active)
4383             {
4384                 *convert = CONVERT_CK_5551;
4385                 format->glFormat = GL_BGRA;
4386                 format->glInternal = GL_RGB5_A1;
4387                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4388                 format->conv_byte_count = 2;
4389             }
4390             break;
4391
4392         case WINED3DFMT_B8G8R8_UNORM:
4393             if (colorkey_active)
4394             {
4395                 *convert = CONVERT_CK_RGB24;
4396                 format->glFormat = GL_RGBA;
4397                 format->glInternal = GL_RGBA8;
4398                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4399                 format->conv_byte_count = 4;
4400             }
4401             break;
4402
4403         case WINED3DFMT_B8G8R8X8_UNORM:
4404             if (colorkey_active)
4405             {
4406                 *convert = CONVERT_RGB32_888;
4407                 format->glFormat = GL_RGBA;
4408                 format->glInternal = GL_RGBA8;
4409                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4410                 format->conv_byte_count = 4;
4411             }
4412             break;
4413
4414         default:
4415             break;
4416     }
4417
4418     return WINED3D_OK;
4419 }
4420
4421 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4422 {
4423     const struct wined3d_device *device = surface->resource.device;
4424     const struct wined3d_palette *pal = surface->palette;
4425     BOOL index_in_alpha = FALSE;
4426     unsigned int i;
4427
4428     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4429      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4430      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4431      * duplicate entries. Store the color key in the unused alpha component to speed the
4432      * download up and to make conversion unneeded. */
4433     index_in_alpha = primary_render_target_is_p8(device);
4434
4435     if (!pal)
4436     {
4437         /* In DirectDraw the palette is a property of the surface, there are no such things as device palettes. */
4438         if (device->wined3d->flags & WINED3D_PALETTE_PER_SURFACE)
4439         {
4440             ERR("This code should never get entered for DirectDraw!, expect problems\n");
4441             if (index_in_alpha)
4442             {
4443                 /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4444                  * there's no palette at this time. */
4445                 for (i = 0; i < 256; i++) table[i][3] = i;
4446             }
4447         }
4448         else
4449         {
4450             /* Direct3D >= 8 palette usage style: P8 textures use device palettes, palette entry format is A8R8G8B8,
4451              * alpha is stored in peFlags and may be used by the app if D3DPTEXTURECAPS_ALPHAPALETTE device
4452              * capability flag is present (wine does advertise this capability) */
4453             for (i = 0; i < 256; ++i)
4454             {
4455                 table[i][0] = device->palettes[device->currentPalette][i].peRed;
4456                 table[i][1] = device->palettes[device->currentPalette][i].peGreen;
4457                 table[i][2] = device->palettes[device->currentPalette][i].peBlue;
4458                 table[i][3] = device->palettes[device->currentPalette][i].peFlags;
4459             }
4460         }
4461     }
4462     else
4463     {
4464         TRACE("Using surface palette %p\n", pal);
4465         /* Get the surface's palette */
4466         for (i = 0; i < 256; ++i)
4467         {
4468             table[i][0] = pal->palents[i].peRed;
4469             table[i][1] = pal->palents[i].peGreen;
4470             table[i][2] = pal->palents[i].peBlue;
4471
4472             /* When index_in_alpha is set the palette index is stored in the
4473              * alpha component. In case of a readback we can then read
4474              * GL_ALPHA. Color keying is handled in BltOverride using a
4475              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4476              * color key itself is passed to glAlphaFunc in other cases the
4477              * alpha component of pixels that should be masked away is set to 0. */
4478             if (index_in_alpha)
4479             {
4480                 table[i][3] = i;
4481             }
4482             else if (colorkey && (i >= surface->SrcBltCKey.dwColorSpaceLowValue)
4483                     && (i <= surface->SrcBltCKey.dwColorSpaceHighValue))
4484             {
4485                 table[i][3] = 0x00;
4486             }
4487             else if (pal->flags & WINEDDPCAPS_ALPHA)
4488             {
4489                 table[i][3] = pal->palents[i].peFlags;
4490             }
4491             else
4492             {
4493                 table[i][3] = 0xFF;
4494             }
4495         }
4496     }
4497 }
4498
4499 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width,
4500         UINT height, UINT outpitch, CONVERT_TYPES convert, struct wined3d_surface *surface)
4501 {
4502     const BYTE *source;
4503     BYTE *dest;
4504     TRACE("(%p)->(%p),(%d,%d,%d,%d,%p)\n", src, dst, pitch, height, outpitch, convert, surface);
4505
4506     switch (convert) {
4507         case NO_CONVERSION:
4508         {
4509             memcpy(dst, src, pitch * height);
4510             break;
4511         }
4512         case CONVERT_PALETTED:
4513         case CONVERT_PALETTED_CK:
4514         {
4515             BYTE table[256][4];
4516             unsigned int x, y;
4517
4518             d3dfmt_p8_init_palette(surface, table, (convert == CONVERT_PALETTED_CK));
4519
4520             for (y = 0; y < height; y++)
4521             {
4522                 source = src + pitch * y;
4523                 dest = dst + outpitch * y;
4524                 /* This is an 1 bpp format, using the width here is fine */
4525                 for (x = 0; x < width; x++) {
4526                     BYTE color = *source++;
4527                     *dest++ = table[color][0];
4528                     *dest++ = table[color][1];
4529                     *dest++ = table[color][2];
4530                     *dest++ = table[color][3];
4531                 }
4532             }
4533         }
4534         break;
4535
4536         case CONVERT_CK_565:
4537         {
4538             /* Converting the 565 format in 5551 packed to emulate color-keying.
4539
4540               Note : in all these conversion, it would be best to average the averaging
4541                       pixels to get the color of the pixel that will be color-keyed to
4542                       prevent 'color bleeding'. This will be done later on if ever it is
4543                       too visible.
4544
4545               Note2: Nvidia documents say that their driver does not support alpha + color keying
4546                      on the same surface and disables color keying in such a case
4547             */
4548             unsigned int x, y;
4549             const WORD *Source;
4550             WORD *Dest;
4551
4552             TRACE("Color keyed 565\n");
4553
4554             for (y = 0; y < height; y++) {
4555                 Source = (const WORD *)(src + y * pitch);
4556                 Dest = (WORD *) (dst + y * outpitch);
4557                 for (x = 0; x < width; x++ ) {
4558                     WORD color = *Source++;
4559                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4560                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4561                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4562                         *Dest |= 0x0001;
4563                     Dest++;
4564                 }
4565             }
4566         }
4567         break;
4568
4569         case CONVERT_CK_5551:
4570         {
4571             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4572             unsigned int x, y;
4573             const WORD *Source;
4574             WORD *Dest;
4575             TRACE("Color keyed 5551\n");
4576             for (y = 0; y < height; y++) {
4577                 Source = (const WORD *)(src + y * pitch);
4578                 Dest = (WORD *) (dst + y * outpitch);
4579                 for (x = 0; x < width; x++ ) {
4580                     WORD color = *Source++;
4581                     *Dest = color;
4582                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4583                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4584                         *Dest |= (1 << 15);
4585                     else
4586                         *Dest &= ~(1 << 15);
4587                     Dest++;
4588                 }
4589             }
4590         }
4591         break;
4592
4593         case CONVERT_CK_RGB24:
4594         {
4595             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4596             unsigned int x, y;
4597             for (y = 0; y < height; y++)
4598             {
4599                 source = src + pitch * y;
4600                 dest = dst + outpitch * y;
4601                 for (x = 0; x < width; x++) {
4602                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4603                     DWORD dstcolor = color << 8;
4604                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4605                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4606                         dstcolor |= 0xff;
4607                     *(DWORD*)dest = dstcolor;
4608                     source += 3;
4609                     dest += 4;
4610                 }
4611             }
4612         }
4613         break;
4614
4615         case CONVERT_RGB32_888:
4616         {
4617             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4618             unsigned int x, y;
4619             for (y = 0; y < height; y++)
4620             {
4621                 source = src + pitch * y;
4622                 dest = dst + outpitch * y;
4623                 for (x = 0; x < width; x++) {
4624                     DWORD color = 0xffffff & *(const DWORD*)source;
4625                     DWORD dstcolor = color << 8;
4626                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4627                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4628                         dstcolor |= 0xff;
4629                     *(DWORD*)dest = dstcolor;
4630                     source += 4;
4631                     dest += 4;
4632                 }
4633             }
4634         }
4635         break;
4636
4637         default:
4638             ERR("Unsupported conversion type %#x.\n", convert);
4639     }
4640     return WINED3D_OK;
4641 }
4642
4643 BOOL palette9_changed(struct wined3d_surface *surface)
4644 {
4645     struct wined3d_device *device = surface->resource.device;
4646
4647     if (surface->palette || (surface->resource.format->id != WINED3DFMT_P8_UINT
4648             && surface->resource.format->id != WINED3DFMT_P8_UINT_A8_UNORM))
4649     {
4650         /* If a ddraw-style palette is attached assume no d3d9 palette change.
4651          * Also the palette isn't interesting if the surface format isn't P8 or A8P8
4652          */
4653         return FALSE;
4654     }
4655
4656     if (surface->palette9)
4657     {
4658         if (!memcmp(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256))
4659         {
4660             return FALSE;
4661         }
4662     }
4663     else
4664     {
4665         surface->palette9 = HeapAlloc(GetProcessHeap(), 0, sizeof(PALETTEENTRY) * 256);
4666     }
4667     memcpy(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256);
4668
4669     return TRUE;
4670 }
4671
4672 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4673 {
4674     /* Flip the surface contents */
4675     /* Flip the DC */
4676     {
4677         HDC tmp;
4678         tmp = front->hDC;
4679         front->hDC = back->hDC;
4680         back->hDC = tmp;
4681     }
4682
4683     /* Flip the DIBsection */
4684     {
4685         HBITMAP tmp;
4686         BOOL hasDib = front->flags & SFLAG_DIBSECTION;
4687         tmp = front->dib.DIBsection;
4688         front->dib.DIBsection = back->dib.DIBsection;
4689         back->dib.DIBsection = tmp;
4690
4691         if (back->flags & SFLAG_DIBSECTION) front->flags |= SFLAG_DIBSECTION;
4692         else front->flags &= ~SFLAG_DIBSECTION;
4693         if (hasDib) back->flags |= SFLAG_DIBSECTION;
4694         else back->flags &= ~SFLAG_DIBSECTION;
4695     }
4696
4697     /* Flip the surface data */
4698     {
4699         void* tmp;
4700
4701         tmp = front->dib.bitmap_data;
4702         front->dib.bitmap_data = back->dib.bitmap_data;
4703         back->dib.bitmap_data = tmp;
4704
4705         tmp = front->resource.allocatedMemory;
4706         front->resource.allocatedMemory = back->resource.allocatedMemory;
4707         back->resource.allocatedMemory = tmp;
4708
4709         tmp = front->resource.heapMemory;
4710         front->resource.heapMemory = back->resource.heapMemory;
4711         back->resource.heapMemory = tmp;
4712     }
4713
4714     /* Flip the PBO */
4715     {
4716         GLuint tmp_pbo = front->pbo;
4717         front->pbo = back->pbo;
4718         back->pbo = tmp_pbo;
4719     }
4720
4721     /* client_memory should not be different, but just in case */
4722     {
4723         BOOL tmp;
4724         tmp = front->dib.client_memory;
4725         front->dib.client_memory = back->dib.client_memory;
4726         back->dib.client_memory = tmp;
4727     }
4728
4729     /* Flip the opengl texture */
4730     {
4731         GLuint tmp;
4732
4733         tmp = back->texture_name;
4734         back->texture_name = front->texture_name;
4735         front->texture_name = tmp;
4736
4737         tmp = back->texture_name_srgb;
4738         back->texture_name_srgb = front->texture_name_srgb;
4739         front->texture_name_srgb = tmp;
4740
4741         tmp = back->rb_multisample;
4742         back->rb_multisample = front->rb_multisample;
4743         front->rb_multisample = tmp;
4744
4745         tmp = back->rb_resolved;
4746         back->rb_resolved = front->rb_resolved;
4747         front->rb_resolved = tmp;
4748
4749         resource_unload(&back->resource);
4750         resource_unload(&front->resource);
4751     }
4752
4753     {
4754         DWORD tmp_flags = back->flags;
4755         back->flags = front->flags;
4756         front->flags = tmp_flags;
4757     }
4758 }
4759
4760 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4761  * pixel copy calls. */
4762 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4763         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4764 {
4765     struct wined3d_device *device = dst_surface->resource.device;
4766     float xrel, yrel;
4767     UINT row;
4768     struct wined3d_context *context;
4769     BOOL upsidedown = FALSE;
4770     RECT dst_rect = *dst_rect_in;
4771
4772     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4773      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4774      */
4775     if(dst_rect.top > dst_rect.bottom) {
4776         UINT tmp = dst_rect.bottom;
4777         dst_rect.bottom = dst_rect.top;
4778         dst_rect.top = tmp;
4779         upsidedown = TRUE;
4780     }
4781
4782     context = context_acquire(device, src_surface);
4783     context_apply_blit_state(context, device);
4784     surface_internal_preload(dst_surface, SRGB_RGB);
4785     ENTER_GL();
4786
4787     /* Bind the target texture */
4788     context_bind_texture(context, dst_surface->texture_target, dst_surface->texture_name);
4789     if (surface_is_offscreen(src_surface))
4790     {
4791         TRACE("Reading from an offscreen target\n");
4792         upsidedown = !upsidedown;
4793         glReadBuffer(device->offscreenBuffer);
4794     }
4795     else
4796     {
4797         glReadBuffer(surface_get_gl_buffer(src_surface));
4798     }
4799     checkGLcall("glReadBuffer");
4800
4801     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4802     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4803
4804     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4805     {
4806         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4807
4808         if(Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT) {
4809             ERR("Texture filtering not supported in direct blit\n");
4810         }
4811     }
4812     else if ((Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT)
4813             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4814     {
4815         ERR("Texture filtering not supported in direct blit\n");
4816     }
4817
4818     if (upsidedown
4819             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4820             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4821     {
4822         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4823
4824         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4825                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4826                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4827                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4828     }
4829     else
4830     {
4831         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4832         /* I have to process this row by row to swap the image,
4833          * otherwise it would be upside down, so stretching in y direction
4834          * doesn't cost extra time
4835          *
4836          * However, stretching in x direction can be avoided if not necessary
4837          */
4838         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4839             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4840             {
4841                 /* Well, that stuff works, but it's very slow.
4842                  * find a better way instead
4843                  */
4844                 UINT col;
4845
4846                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4847                 {
4848                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4849                             dst_rect.left + col /* x offset */, row /* y offset */,
4850                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4851                 }
4852             }
4853             else
4854             {
4855                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4856                         dst_rect.left /* x offset */, row /* y offset */,
4857                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4858             }
4859         }
4860     }
4861     checkGLcall("glCopyTexSubImage2D");
4862
4863     LEAVE_GL();
4864     context_release(context);
4865
4866     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4867      * path is never entered
4868      */
4869     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4870 }
4871
4872 /* Uses the hardware to stretch and flip the image */
4873 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4874         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4875 {
4876     struct wined3d_device *device = dst_surface->resource.device;
4877     struct wined3d_swapchain *src_swapchain = NULL;
4878     GLuint src, backup = 0;
4879     float left, right, top, bottom; /* Texture coordinates */
4880     UINT fbwidth = src_surface->resource.width;
4881     UINT fbheight = src_surface->resource.height;
4882     struct wined3d_context *context;
4883     GLenum drawBuffer = GL_BACK;
4884     GLenum texture_target;
4885     BOOL noBackBufferBackup;
4886     BOOL src_offscreen;
4887     BOOL upsidedown = FALSE;
4888     RECT dst_rect = *dst_rect_in;
4889
4890     TRACE("Using hwstretch blit\n");
4891     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4892     context = context_acquire(device, src_surface);
4893     context_apply_blit_state(context, device);
4894     surface_internal_preload(dst_surface, SRGB_RGB);
4895
4896     src_offscreen = surface_is_offscreen(src_surface);
4897     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4898     if (!noBackBufferBackup && !src_surface->texture_name)
4899     {
4900         /* Get it a description */
4901         surface_internal_preload(src_surface, SRGB_RGB);
4902     }
4903     ENTER_GL();
4904
4905     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4906      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4907      */
4908     if (context->aux_buffers >= 2)
4909     {
4910         /* Got more than one aux buffer? Use the 2nd aux buffer */
4911         drawBuffer = GL_AUX1;
4912     }
4913     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4914     {
4915         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
4916         drawBuffer = GL_AUX0;
4917     }
4918
4919     if(noBackBufferBackup) {
4920         glGenTextures(1, &backup);
4921         checkGLcall("glGenTextures");
4922         context_bind_texture(context, GL_TEXTURE_2D, backup);
4923         texture_target = GL_TEXTURE_2D;
4924     } else {
4925         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
4926          * we are reading from the back buffer, the backup can be used as source texture
4927          */
4928         texture_target = src_surface->texture_target;
4929         context_bind_texture(context, texture_target, src_surface->texture_name);
4930         glEnable(texture_target);
4931         checkGLcall("glEnable(texture_target)");
4932
4933         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
4934         src_surface->flags &= ~SFLAG_INTEXTURE;
4935     }
4936
4937     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4938      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4939      */
4940     if(dst_rect.top > dst_rect.bottom) {
4941         UINT tmp = dst_rect.bottom;
4942         dst_rect.bottom = dst_rect.top;
4943         dst_rect.top = tmp;
4944         upsidedown = TRUE;
4945     }
4946
4947     if (src_offscreen)
4948     {
4949         TRACE("Reading from an offscreen target\n");
4950         upsidedown = !upsidedown;
4951         glReadBuffer(device->offscreenBuffer);
4952     }
4953     else
4954     {
4955         glReadBuffer(surface_get_gl_buffer(src_surface));
4956     }
4957
4958     /* TODO: Only back up the part that will be overwritten */
4959     glCopyTexSubImage2D(texture_target, 0,
4960                         0, 0 /* read offsets */,
4961                         0, 0,
4962                         fbwidth,
4963                         fbheight);
4964
4965     checkGLcall("glCopyTexSubImage2D");
4966
4967     /* No issue with overriding these - the sampler is dirty due to blit usage */
4968     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
4969             wined3d_gl_mag_filter(magLookup, Filter));
4970     checkGLcall("glTexParameteri");
4971     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
4972             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
4973     checkGLcall("glTexParameteri");
4974
4975     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
4976         src_swapchain = src_surface->container.u.swapchain;
4977     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
4978     {
4979         src = backup ? backup : src_surface->texture_name;
4980     }
4981     else
4982     {
4983         glReadBuffer(GL_FRONT);
4984         checkGLcall("glReadBuffer(GL_FRONT)");
4985
4986         glGenTextures(1, &src);
4987         checkGLcall("glGenTextures(1, &src)");
4988         context_bind_texture(context, GL_TEXTURE_2D, src);
4989
4990         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
4991          * out for power of 2 sizes
4992          */
4993         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
4994                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
4995         checkGLcall("glTexImage2D");
4996         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
4997                             0, 0 /* read offsets */,
4998                             0, 0,
4999                             fbwidth,
5000                             fbheight);
5001
5002         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5003         checkGLcall("glTexParameteri");
5004         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5005         checkGLcall("glTexParameteri");
5006
5007         glReadBuffer(GL_BACK);
5008         checkGLcall("glReadBuffer(GL_BACK)");
5009
5010         if(texture_target != GL_TEXTURE_2D) {
5011             glDisable(texture_target);
5012             glEnable(GL_TEXTURE_2D);
5013             texture_target = GL_TEXTURE_2D;
5014         }
5015     }
5016     checkGLcall("glEnd and previous");
5017
5018     left = src_rect->left;
5019     right = src_rect->right;
5020
5021     if (!upsidedown)
5022     {
5023         top = src_surface->resource.height - src_rect->top;
5024         bottom = src_surface->resource.height - src_rect->bottom;
5025     }
5026     else
5027     {
5028         top = src_surface->resource.height - src_rect->bottom;
5029         bottom = src_surface->resource.height - src_rect->top;
5030     }
5031
5032     if (src_surface->flags & SFLAG_NORMCOORD)
5033     {
5034         left /= src_surface->pow2Width;
5035         right /= src_surface->pow2Width;
5036         top /= src_surface->pow2Height;
5037         bottom /= src_surface->pow2Height;
5038     }
5039
5040     /* draw the source texture stretched and upside down. The correct surface is bound already */
5041     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5042     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5043
5044     context_set_draw_buffer(context, drawBuffer);
5045     glReadBuffer(drawBuffer);
5046
5047     glBegin(GL_QUADS);
5048         /* bottom left */
5049         glTexCoord2f(left, bottom);
5050         glVertex2i(0, 0);
5051
5052         /* top left */
5053         glTexCoord2f(left, top);
5054         glVertex2i(0, dst_rect.bottom - dst_rect.top);
5055
5056         /* top right */
5057         glTexCoord2f(right, top);
5058         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5059
5060         /* bottom right */
5061         glTexCoord2f(right, bottom);
5062         glVertex2i(dst_rect.right - dst_rect.left, 0);
5063     glEnd();
5064     checkGLcall("glEnd and previous");
5065
5066     if (texture_target != dst_surface->texture_target)
5067     {
5068         glDisable(texture_target);
5069         glEnable(dst_surface->texture_target);
5070         texture_target = dst_surface->texture_target;
5071     }
5072
5073     /* Now read the stretched and upside down image into the destination texture */
5074     context_bind_texture(context, texture_target, dst_surface->texture_name);
5075     glCopyTexSubImage2D(texture_target,
5076                         0,
5077                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5078                         0, 0, /* We blitted the image to the origin */
5079                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5080     checkGLcall("glCopyTexSubImage2D");
5081
5082     if(drawBuffer == GL_BACK) {
5083         /* Write the back buffer backup back */
5084         if(backup) {
5085             if(texture_target != GL_TEXTURE_2D) {
5086                 glDisable(texture_target);
5087                 glEnable(GL_TEXTURE_2D);
5088                 texture_target = GL_TEXTURE_2D;
5089             }
5090             context_bind_texture(context, GL_TEXTURE_2D, backup);
5091         }
5092         else
5093         {
5094             if (texture_target != src_surface->texture_target)
5095             {
5096                 glDisable(texture_target);
5097                 glEnable(src_surface->texture_target);
5098                 texture_target = src_surface->texture_target;
5099             }
5100             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5101         }
5102
5103         glBegin(GL_QUADS);
5104             /* top left */
5105             glTexCoord2f(0.0f, 0.0f);
5106             glVertex2i(0, fbheight);
5107
5108             /* bottom left */
5109             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5110             glVertex2i(0, 0);
5111
5112             /* bottom right */
5113             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5114                     (float)fbheight / (float)src_surface->pow2Height);
5115             glVertex2i(fbwidth, 0);
5116
5117             /* top right */
5118             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5119             glVertex2i(fbwidth, fbheight);
5120         glEnd();
5121     }
5122     glDisable(texture_target);
5123     checkGLcall("glDisable(texture_target)");
5124
5125     /* Cleanup */
5126     if (src != src_surface->texture_name && src != backup)
5127     {
5128         glDeleteTextures(1, &src);
5129         checkGLcall("glDeleteTextures(1, &src)");
5130     }
5131     if(backup) {
5132         glDeleteTextures(1, &backup);
5133         checkGLcall("glDeleteTextures(1, &backup)");
5134     }
5135
5136     LEAVE_GL();
5137
5138     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5139
5140     context_release(context);
5141
5142     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5143      * path is never entered
5144      */
5145     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5146 }
5147
5148 /* Front buffer coordinates are always full screen coordinates, but our GL
5149  * drawable is limited to the window's client area. The sysmem and texture
5150  * copies do have the full screen size. Note that GL has a bottom-left
5151  * origin, while D3D has a top-left origin. */
5152 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5153 {
5154     UINT drawable_height;
5155
5156     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5157             && surface == surface->container.u.swapchain->front_buffer)
5158     {
5159         POINT offset = {0, 0};
5160         RECT windowsize;
5161
5162         ScreenToClient(window, &offset);
5163         OffsetRect(rect, offset.x, offset.y);
5164
5165         GetClientRect(window, &windowsize);
5166         drawable_height = windowsize.bottom - windowsize.top;
5167     }
5168     else
5169     {
5170         drawable_height = surface->resource.height;
5171     }
5172
5173     rect->top = drawable_height - rect->top;
5174     rect->bottom = drawable_height - rect->bottom;
5175 }
5176
5177 static void surface_blt_to_drawable(struct wined3d_device *device,
5178         WINED3DTEXTUREFILTERTYPE filter, BOOL color_key,
5179         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5180         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5181 {
5182     struct wined3d_context *context;
5183     RECT src_rect, dst_rect;
5184
5185     src_rect = *src_rect_in;
5186     dst_rect = *dst_rect_in;
5187
5188     /* Make sure the surface is up-to-date. This should probably use
5189      * surface_load_location() and worry about the destination surface too,
5190      * unless we're overwriting it completely. */
5191     surface_internal_preload(src_surface, SRGB_RGB);
5192
5193     /* Activate the destination context, set it up for blitting */
5194     context = context_acquire(device, dst_surface);
5195     context_apply_blit_state(context, device);
5196
5197     if (!surface_is_offscreen(dst_surface))
5198         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5199
5200     device->blitter->set_shader(device->blit_priv, context, src_surface);
5201
5202     ENTER_GL();
5203
5204     if (color_key)
5205     {
5206         glEnable(GL_ALPHA_TEST);
5207         checkGLcall("glEnable(GL_ALPHA_TEST)");
5208
5209         /* When the primary render target uses P8, the alpha component
5210          * contains the palette index. Which means that the colorkey is one of
5211          * the palette entries. In other cases pixels that should be masked
5212          * away have alpha set to 0. */
5213         if (primary_render_target_is_p8(device))
5214             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->SrcBltCKey.dwColorSpaceLowValue / 256.0f);
5215         else
5216             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5217         checkGLcall("glAlphaFunc");
5218     }
5219     else
5220     {
5221         glDisable(GL_ALPHA_TEST);
5222         checkGLcall("glDisable(GL_ALPHA_TEST)");
5223     }
5224
5225     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5226
5227     if (color_key)
5228     {
5229         glDisable(GL_ALPHA_TEST);
5230         checkGLcall("glDisable(GL_ALPHA_TEST)");
5231     }
5232
5233     LEAVE_GL();
5234
5235     /* Leave the opengl state valid for blitting */
5236     device->blitter->unset_shader(context->gl_info);
5237
5238     if (wined3d_settings.strict_draw_ordering
5239             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5240             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5241         wglFlush(); /* Flush to ensure ordering across contexts. */
5242
5243     context_release(context);
5244 }
5245
5246 /* Do not call while under the GL lock. */
5247 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const WINED3DCOLORVALUE *color)
5248 {
5249     struct wined3d_device *device = s->resource.device;
5250     const struct blit_shader *blitter;
5251
5252     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5253             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5254     if (!blitter)
5255     {
5256         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5257         return WINED3DERR_INVALIDCALL;
5258     }
5259
5260     return blitter->color_fill(device, s, rect, color);
5261 }
5262
5263 /* Do not call while under the GL lock. */
5264 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5265         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5266         WINED3DTEXTUREFILTERTYPE Filter)
5267 {
5268     struct wined3d_device *device = dst_surface->resource.device;
5269     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5270     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5271
5272     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5273             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5274             flags, DDBltFx, debug_d3dtexturefiltertype(Filter));
5275
5276     /* Get the swapchain. One of the surfaces has to be a primary surface */
5277     if (dst_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5278     {
5279         WARN("Destination is in sysmem, rejecting gl blt\n");
5280         return WINED3DERR_INVALIDCALL;
5281     }
5282
5283     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5284         dstSwapchain = dst_surface->container.u.swapchain;
5285
5286     if (src_surface)
5287     {
5288         if (src_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5289         {
5290             WARN("Src is in sysmem, rejecting gl blt\n");
5291             return WINED3DERR_INVALIDCALL;
5292         }
5293
5294         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5295             srcSwapchain = src_surface->container.u.swapchain;
5296     }
5297
5298     /* Early sort out of cases where no render target is used */
5299     if (!dstSwapchain && !srcSwapchain
5300             && src_surface != device->fb.render_targets[0]
5301             && dst_surface != device->fb.render_targets[0])
5302     {
5303         TRACE("No surface is render target, not using hardware blit.\n");
5304         return WINED3DERR_INVALIDCALL;
5305     }
5306
5307     /* No destination color keying supported */
5308     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5309     {
5310         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5311         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5312         return WINED3DERR_INVALIDCALL;
5313     }
5314
5315     /* The only case where both surfaces on a swapchain are supported is a
5316      * back buffer -> front buffer blit on the same swapchain. */
5317     if (dstSwapchain && dstSwapchain->back_buffers
5318             && dst_surface == dstSwapchain->front_buffer
5319             && src_surface == dstSwapchain->back_buffers[0])
5320     {
5321         /* Half-Life does a Blt from the back buffer to the front buffer,
5322          * Full surface size, no flags... Use present instead
5323          *
5324          * This path will only be entered for d3d7 and ddraw apps, because d3d8/9 offer no way to blit TO the front buffer
5325          */
5326
5327         /* Check rects - wined3d_swapchain_present() doesn't handle them. */
5328         for (;;)
5329         {
5330             TRACE("Looking if a Present can be done...\n");
5331             /* Source Rectangle must be full surface */
5332             if (src_rect->left || src_rect->top
5333                     || src_rect->right != src_surface->resource.width
5334                     || src_rect->bottom != src_surface->resource.height)
5335             {
5336                 TRACE("No, Source rectangle doesn't match\n");
5337                 break;
5338             }
5339
5340             /* No stretching may occur */
5341             if (src_rect->right != dst_rect->right - dst_rect->left
5342                     || src_rect->bottom != dst_rect->bottom - dst_rect->top)
5343             {
5344                 TRACE("No, stretching is done\n");
5345                 break;
5346             }
5347
5348             /* Destination must be full surface or match the clipping rectangle */
5349             if (dst_surface->clipper && dst_surface->clipper->hWnd)
5350             {
5351                 RECT cliprect;
5352                 POINT pos[2];
5353                 GetClientRect(dst_surface->clipper->hWnd, &cliprect);
5354                 pos[0].x = dst_rect->left;
5355                 pos[0].y = dst_rect->top;
5356                 pos[1].x = dst_rect->right;
5357                 pos[1].y = dst_rect->bottom;
5358                 MapWindowPoints(GetDesktopWindow(), dst_surface->clipper->hWnd, pos, 2);
5359
5360                 if (pos[0].x != cliprect.left || pos[0].y != cliprect.top
5361                         || pos[1].x != cliprect.right || pos[1].y != cliprect.bottom)
5362                 {
5363                     TRACE("No, dest rectangle doesn't match(clipper)\n");
5364                     TRACE("Clip rect at %s\n", wine_dbgstr_rect(&cliprect));
5365                     TRACE("Blt dest: %s\n", wine_dbgstr_rect(dst_rect));
5366                     break;
5367                 }
5368             }
5369             else if (dst_rect->left || dst_rect->top
5370                     || dst_rect->right != dst_surface->resource.width
5371                     || dst_rect->bottom != dst_surface->resource.height)
5372             {
5373                 TRACE("No, dest rectangle doesn't match(surface size)\n");
5374                 break;
5375             }
5376
5377             TRACE("Yes\n");
5378
5379             /* These flags are unimportant for the flag check, remove them */
5380             if (!(flags & ~(WINEDDBLT_DONOTWAIT | WINEDDBLT_WAIT)))
5381             {
5382                 WINED3DSWAPEFFECT orig_swap = dstSwapchain->presentParms.SwapEffect;
5383
5384                 /* The idea behind this is that a glReadPixels and a glDrawPixels call
5385                     * take very long, while a flip is fast.
5386                     * This applies to Half-Life, which does such Blts every time it finished
5387                     * a frame, and to Prince of Persia 3D, which uses this to draw at least the main
5388                     * menu. This is also used by all apps when they do windowed rendering
5389                     *
5390                     * The problem is that flipping is not really the same as copying. After a
5391                     * Blt the front buffer is a copy of the back buffer, and the back buffer is
5392                     * untouched. Therefore it's necessary to override the swap effect
5393                     * and to set it back after the flip.
5394                     *
5395                     * Windowed Direct3D < 7 apps do the same. The D3D7 sdk demos are nice
5396                     * testcases.
5397                     */
5398
5399                 dstSwapchain->presentParms.SwapEffect = WINED3DSWAPEFFECT_COPY;
5400                 dstSwapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_IMMEDIATE;
5401
5402                 TRACE("Full screen back buffer -> front buffer blt, performing a flip instead.\n");
5403                 wined3d_swapchain_present(dstSwapchain, NULL, NULL, dstSwapchain->win_handle, NULL, 0);
5404
5405                 dstSwapchain->presentParms.SwapEffect = orig_swap;
5406
5407                 return WINED3D_OK;
5408             }
5409             break;
5410         }
5411
5412         TRACE("Unsupported blit between buffers on the same swapchain\n");
5413         return WINED3DERR_INVALIDCALL;
5414     } else if(dstSwapchain && dstSwapchain == srcSwapchain) {
5415         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5416         return WINED3DERR_INVALIDCALL;
5417     } else if(dstSwapchain && srcSwapchain) {
5418         FIXME("Implement hardware blit between two different swapchains\n");
5419         return WINED3DERR_INVALIDCALL;
5420     }
5421     else if (dstSwapchain)
5422     {
5423         /* Handled with regular texture -> swapchain blit */
5424         if (src_surface == device->fb.render_targets[0])
5425             TRACE("Blit from active render target to a swapchain\n");
5426     }
5427     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5428     {
5429         FIXME("Implement blit from a swapchain to the active render target\n");
5430         return WINED3DERR_INVALIDCALL;
5431     }
5432
5433     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5434     {
5435         /* Blit from render target to texture */
5436         BOOL stretchx;
5437
5438         /* P8 read back is not implemented */
5439         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5440                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5441         {
5442             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5443             return WINED3DERR_INVALIDCALL;
5444         }
5445
5446         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5447         {
5448             TRACE("Color keying not supported by frame buffer to texture blit\n");
5449             return WINED3DERR_INVALIDCALL;
5450             /* Destination color key is checked above */
5451         }
5452
5453         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5454             stretchx = TRUE;
5455         else
5456             stretchx = FALSE;
5457
5458         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5459          * flip the image nor scale it.
5460          *
5461          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5462          * -> If the app wants a image width an unscaled width, copy it line per line
5463          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5464          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5465          *    back buffer. This is slower than reading line per line, thus not used for flipping
5466          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5467          *    pixel by pixel. */
5468         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5469                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5470         {
5471             TRACE("No stretching in x direction, using direct framebuffer -> texture copy\n");
5472             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, Filter);
5473         } else {
5474             TRACE("Using hardware stretching to flip / stretch the texture\n");
5475             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, Filter);
5476         }
5477
5478         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5479         {
5480             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5481             dst_surface->resource.allocatedMemory = NULL;
5482             dst_surface->resource.heapMemory = NULL;
5483         }
5484         else
5485         {
5486             dst_surface->flags &= ~SFLAG_INSYSMEM;
5487         }
5488
5489         return WINED3D_OK;
5490     }
5491     else if (src_surface)
5492     {
5493         /* Blit from offscreen surface to render target */
5494         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5495         WINEDDCOLORKEY oldBltCKey = src_surface->SrcBltCKey;
5496
5497         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5498
5499         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5500                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5501                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5502         {
5503             FIXME("Unsupported blit operation falling back to software\n");
5504             return WINED3DERR_INVALIDCALL;
5505         }
5506
5507         /* Color keying: Check if we have to do a color keyed blt,
5508          * and if not check if a color key is activated.
5509          *
5510          * Just modify the color keying parameters in the surface and restore them afterwards
5511          * The surface keeps track of the color key last used to load the opengl surface.
5512          * PreLoad will catch the change to the flags and color key and reload if necessary.
5513          */
5514         if (flags & WINEDDBLT_KEYSRC)
5515         {
5516             /* Use color key from surface */
5517         }
5518         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5519         {
5520             /* Use color key from DDBltFx */
5521             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5522             src_surface->SrcBltCKey = DDBltFx->ddckSrcColorkey;
5523         }
5524         else
5525         {
5526             /* Do not use color key */
5527             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5528         }
5529
5530         surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5531                 src_surface, src_rect, dst_surface, dst_rect);
5532
5533         /* Restore the color key parameters */
5534         src_surface->CKeyFlags = oldCKeyFlags;
5535         src_surface->SrcBltCKey = oldBltCKey;
5536
5537         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5538
5539         return WINED3D_OK;
5540     }
5541
5542     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5543     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5544     return WINED3DERR_INVALIDCALL;
5545 }
5546
5547 /* GL locking is done by the caller */
5548 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5549         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5550 {
5551     struct wined3d_device *device = surface->resource.device;
5552     const struct wined3d_gl_info *gl_info = context->gl_info;
5553     GLint compare_mode = GL_NONE;
5554     struct blt_info info;
5555     GLint old_binding = 0;
5556     RECT rect;
5557
5558     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5559
5560     glDisable(GL_CULL_FACE);
5561     glDisable(GL_BLEND);
5562     glDisable(GL_ALPHA_TEST);
5563     glDisable(GL_SCISSOR_TEST);
5564     glDisable(GL_STENCIL_TEST);
5565     glEnable(GL_DEPTH_TEST);
5566     glDepthFunc(GL_ALWAYS);
5567     glDepthMask(GL_TRUE);
5568     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5569     glViewport(x, y, w, h);
5570
5571     SetRect(&rect, 0, h, w, 0);
5572     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5573     context_active_texture(context, context->gl_info, 0);
5574     glGetIntegerv(info.binding, &old_binding);
5575     glBindTexture(info.bind_target, texture);
5576     if (gl_info->supported[ARB_SHADOW])
5577     {
5578         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5579         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5580     }
5581
5582     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5583             gl_info, info.tex_type, &surface->ds_current_size);
5584
5585     glBegin(GL_TRIANGLE_STRIP);
5586     glTexCoord3fv(info.coords[0]);
5587     glVertex2f(-1.0f, -1.0f);
5588     glTexCoord3fv(info.coords[1]);
5589     glVertex2f(1.0f, -1.0f);
5590     glTexCoord3fv(info.coords[2]);
5591     glVertex2f(-1.0f, 1.0f);
5592     glTexCoord3fv(info.coords[3]);
5593     glVertex2f(1.0f, 1.0f);
5594     glEnd();
5595
5596     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5597     glBindTexture(info.bind_target, old_binding);
5598
5599     glPopAttrib();
5600
5601     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5602 }
5603
5604 void surface_modify_ds_location(struct wined3d_surface *surface,
5605         DWORD location, UINT w, UINT h)
5606 {
5607     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5608
5609     if (location & ~SFLAG_DS_LOCATIONS)
5610         FIXME("Invalid location (%#x) specified.\n", location);
5611
5612     surface->ds_current_size.cx = w;
5613     surface->ds_current_size.cy = h;
5614     surface->flags &= ~SFLAG_DS_LOCATIONS;
5615     surface->flags |= location;
5616 }
5617
5618 /* Context activation is done by the caller. */
5619 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5620 {
5621     struct wined3d_device *device = surface->resource.device;
5622     GLsizei w, h;
5623
5624     TRACE("surface %p, new location %#x.\n", surface, location);
5625
5626     /* TODO: Make this work for modes other than FBO */
5627     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5628
5629     if (!(surface->flags & location))
5630     {
5631         w = surface->ds_current_size.cx;
5632         h = surface->ds_current_size.cy;
5633         surface->ds_current_size.cx = 0;
5634         surface->ds_current_size.cy = 0;
5635     }
5636     else
5637     {
5638         w = surface->resource.width;
5639         h = surface->resource.height;
5640     }
5641
5642     if (surface->ds_current_size.cx == surface->resource.width
5643             && surface->ds_current_size.cy == surface->resource.height)
5644     {
5645         TRACE("Location (%#x) is already up to date.\n", location);
5646         return;
5647     }
5648
5649     if (surface->current_renderbuffer)
5650     {
5651         FIXME("Not supported with fixed up depth stencil.\n");
5652         return;
5653     }
5654
5655     if (!(surface->flags & SFLAG_DS_LOCATIONS))
5656     {
5657         /* This mostly happens when a depth / stencil is used without being
5658          * cleared first. In principle we could upload from sysmem, or
5659          * explicitly clear before first usage. For the moment there don't
5660          * appear to be a lot of applications depending on this, so a FIXME
5661          * should do. */
5662         FIXME("No up to date depth stencil location.\n");
5663         surface->flags |= location;
5664         surface->ds_current_size.cx = surface->resource.width;
5665         surface->ds_current_size.cy = surface->resource.height;
5666         return;
5667     }
5668
5669     if (location == SFLAG_DS_OFFSCREEN)
5670     {
5671         GLint old_binding = 0;
5672         GLenum bind_target;
5673
5674         /* The render target is allowed to be smaller than the depth/stencil
5675          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5676          * than the offscreen surface. Don't overwrite the offscreen surface
5677          * with undefined data. */
5678         w = min(w, context->swapchain->presentParms.BackBufferWidth);
5679         h = min(h, context->swapchain->presentParms.BackBufferHeight);
5680
5681         TRACE("Copying onscreen depth buffer to depth texture.\n");
5682
5683         ENTER_GL();
5684
5685         if (!device->depth_blt_texture)
5686         {
5687             glGenTextures(1, &device->depth_blt_texture);
5688         }
5689
5690         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5691          * directly on the FBO texture. That's because we need to flip. */
5692         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5693                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5694         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5695         {
5696             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5697             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5698         }
5699         else
5700         {
5701             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5702             bind_target = GL_TEXTURE_2D;
5703         }
5704         glBindTexture(bind_target, device->depth_blt_texture);
5705         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5706          * internal format, because the internal format might include stencil
5707          * data. In principle we should copy stencil data as well, but unless
5708          * the driver supports stencil export it's hard to do, and doesn't
5709          * seem to be needed in practice. If the hardware doesn't support
5710          * writing stencil data, the glCopyTexImage2D() call might trigger
5711          * software fallbacks. */
5712         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5713         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5714         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5715         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5716         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5717         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5718         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5719         glBindTexture(bind_target, old_binding);
5720
5721         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5722                 NULL, surface, SFLAG_INTEXTURE);
5723         context_set_draw_buffer(context, GL_NONE);
5724         glReadBuffer(GL_NONE);
5725
5726         /* Do the actual blit */
5727         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5728         checkGLcall("depth_blt");
5729
5730         context_invalidate_state(context, STATE_FRAMEBUFFER);
5731
5732         LEAVE_GL();
5733
5734         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5735     }
5736     else if (location == SFLAG_DS_ONSCREEN)
5737     {
5738         TRACE("Copying depth texture to onscreen depth buffer.\n");
5739
5740         ENTER_GL();
5741
5742         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5743                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5744         surface_depth_blt(surface, context, surface->texture_name,
5745                 0, surface->pow2Height - h, w, h, surface->texture_target);
5746         checkGLcall("depth_blt");
5747
5748         context_invalidate_state(context, STATE_FRAMEBUFFER);
5749
5750         LEAVE_GL();
5751
5752         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5753     }
5754     else
5755     {
5756         ERR("Invalid location (%#x) specified.\n", location);
5757     }
5758
5759     surface->flags |= location;
5760     surface->ds_current_size.cx = surface->resource.width;
5761     surface->ds_current_size.cy = surface->resource.height;
5762 }
5763
5764 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5765 {
5766     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5767     struct wined3d_surface *overlay;
5768
5769     TRACE("surface %p, location %s, persistent %#x.\n",
5770             surface, debug_surflocation(location), persistent);
5771
5772     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5773             && (location & SFLAG_INDRAWABLE))
5774         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5775
5776     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5777             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5778         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5779
5780     if (persistent)
5781     {
5782         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5783                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5784         {
5785             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5786             {
5787                 TRACE("Passing to container.\n");
5788                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5789             }
5790         }
5791         surface->flags &= ~SFLAG_LOCATIONS;
5792         surface->flags |= location;
5793
5794         /* Redraw emulated overlays, if any */
5795         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5796         {
5797             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5798             {
5799                 overlay->surface_ops->surface_draw_overlay(overlay);
5800             }
5801         }
5802     }
5803     else
5804     {
5805         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5806         {
5807             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5808             {
5809                 TRACE("Passing to container\n");
5810                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5811             }
5812         }
5813         surface->flags &= ~location;
5814     }
5815
5816     if (!(surface->flags & SFLAG_LOCATIONS))
5817     {
5818         ERR("Surface %p does not have any up to date location.\n", surface);
5819     }
5820 }
5821
5822 static DWORD resource_access_from_location(DWORD location)
5823 {
5824     switch (location)
5825     {
5826         case SFLAG_INSYSMEM:
5827             return WINED3D_RESOURCE_ACCESS_CPU;
5828
5829         case SFLAG_INDRAWABLE:
5830         case SFLAG_INSRGBTEX:
5831         case SFLAG_INTEXTURE:
5832         case SFLAG_INRB_MULTISAMPLE:
5833         case SFLAG_INRB_RESOLVED:
5834             return WINED3D_RESOURCE_ACCESS_GPU;
5835
5836         default:
5837             FIXME("Unhandled location %#x.\n", location);
5838             return 0;
5839     }
5840 }
5841
5842 static void surface_load_sysmem(struct wined3d_surface *surface,
5843         const struct wined3d_gl_info *gl_info, const RECT *rect)
5844 {
5845     surface_prepare_system_memory(surface);
5846
5847     /* Download the surface to system memory. */
5848     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5849     {
5850         struct wined3d_device *device = surface->resource.device;
5851         struct wined3d_context *context;
5852
5853         /* TODO: Use already acquired context when possible. */
5854         context = context_acquire(device, NULL);
5855
5856         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5857         surface_download_data(surface, gl_info);
5858
5859         context_release(context);
5860
5861         return;
5862     }
5863
5864     /* Note: It might be faster to download into a texture first. */
5865     read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5866             wined3d_surface_get_pitch(surface));
5867 }
5868
5869 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5870         const struct wined3d_gl_info *gl_info, const RECT *rect)
5871 {
5872     struct wined3d_device *device = surface->resource.device;
5873     struct wined3d_format format;
5874     CONVERT_TYPES convert;
5875     UINT byte_count;
5876     BYTE *mem;
5877
5878     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5879     {
5880         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5881         return WINED3DERR_INVALIDCALL;
5882     }
5883
5884     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5885         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5886
5887     if (surface->flags & SFLAG_INTEXTURE)
5888     {
5889         RECT r;
5890
5891         surface_get_rect(surface, rect, &r);
5892         surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
5893
5894         return WINED3D_OK;
5895     }
5896
5897     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5898     {
5899         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5900          * path through sysmem. */
5901         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5902     }
5903
5904     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5905
5906     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5907      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5908      * called. */
5909     if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
5910     {
5911         struct wined3d_context *context;
5912
5913         TRACE("Removing the pbo attached to surface %p.\n", surface);
5914
5915         /* TODO: Use already acquired context when possible. */
5916         context = context_acquire(device, NULL);
5917
5918         surface_remove_pbo(surface, gl_info);
5919
5920         context_release(context);
5921     }
5922
5923     if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
5924     {
5925         UINT height = surface->resource.height;
5926         UINT width = surface->resource.width;
5927         UINT src_pitch, dst_pitch;
5928
5929         byte_count = format.conv_byte_count;
5930         src_pitch = wined3d_surface_get_pitch(surface);
5931
5932         /* Stick to the alignment for the converted surface too, makes it
5933          * easier to load the surface. */
5934         dst_pitch = width * byte_count;
5935         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5936
5937         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5938         {
5939             ERR("Out of memory (%u).\n", dst_pitch * height);
5940             return E_OUTOFMEMORY;
5941         }
5942
5943         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5944                 src_pitch, width, height, dst_pitch, convert, surface);
5945
5946         surface->flags |= SFLAG_CONVERTED;
5947     }
5948     else
5949     {
5950         surface->flags &= ~SFLAG_CONVERTED;
5951         mem = surface->resource.allocatedMemory;
5952         byte_count = format.byte_count;
5953     }
5954
5955     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
5956
5957     /* Don't delete PBO memory. */
5958     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
5959         HeapFree(GetProcessHeap(), 0, mem);
5960
5961     return WINED3D_OK;
5962 }
5963
5964 static HRESULT surface_load_texture(struct wined3d_surface *surface,
5965         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
5966 {
5967     const DWORD attach_flags = WINED3DFMT_FLAG_FBO_ATTACHABLE | WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB;
5968     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
5969     struct wined3d_device *device = surface->resource.device;
5970     struct wined3d_context *context;
5971     UINT width, src_pitch, dst_pitch;
5972     struct wined3d_bo_address data;
5973     struct wined3d_format format;
5974     POINT dst_point = {0, 0};
5975     CONVERT_TYPES convert;
5976     BYTE *mem;
5977
5978     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
5979             && surface_is_offscreen(surface)
5980             && (surface->flags & SFLAG_INDRAWABLE))
5981     {
5982         read_from_framebuffer_texture(surface, srgb);
5983
5984         return WINED3D_OK;
5985     }
5986
5987     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
5988             && (surface->resource.format->flags & attach_flags) == attach_flags
5989             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5990                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
5991                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
5992     {
5993         if (srgb)
5994             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INTEXTURE,
5995                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
5996         else
5997             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INSRGBTEX,
5998                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
5999
6000         return WINED3D_OK;
6001     }
6002
6003     /* Upload from system memory */
6004
6005     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
6006             TRUE /* We will use textures */, &format, &convert);
6007
6008     if (srgb)
6009     {
6010         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
6011         {
6012             /* Performance warning... */
6013             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6014             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6015         }
6016     }
6017     else
6018     {
6019         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6020         {
6021             /* Performance warning... */
6022             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6023             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6024         }
6025     }
6026
6027     if (!(surface->flags & SFLAG_INSYSMEM))
6028     {
6029         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6030         /* Lets hope we get it from somewhere... */
6031         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6032     }
6033
6034     /* TODO: Use already acquired context when possible. */
6035     context = context_acquire(device, NULL);
6036
6037     surface_prepare_texture(surface, context, srgb);
6038     surface_bind_and_dirtify(surface, context, srgb);
6039
6040     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6041     {
6042         surface->flags |= SFLAG_GLCKEY;
6043         surface->glCKey = surface->SrcBltCKey;
6044     }
6045     else surface->flags &= ~SFLAG_GLCKEY;
6046
6047     width = surface->resource.width;
6048     src_pitch = wined3d_surface_get_pitch(surface);
6049
6050     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6051      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6052      * called. */
6053     if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
6054     {
6055         TRACE("Removing the pbo attached to surface %p.\n", surface);
6056         surface_remove_pbo(surface, gl_info);
6057     }
6058
6059     if (format.convert)
6060     {
6061         /* This code is entered for texture formats which need a fixup. */
6062         UINT height = surface->resource.height;
6063
6064         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6065         dst_pitch = width * format.conv_byte_count;
6066         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6067
6068         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6069         {
6070             ERR("Out of memory (%u).\n", dst_pitch * height);
6071             context_release(context);
6072             return E_OUTOFMEMORY;
6073         }
6074         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6075     }
6076     else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
6077     {
6078         /* This code is only entered for color keying fixups */
6079         UINT height = surface->resource.height;
6080
6081         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6082         dst_pitch = width * format.conv_byte_count;
6083         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6084
6085         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6086         {
6087             ERR("Out of memory (%u).\n", dst_pitch * height);
6088             context_release(context);
6089             return E_OUTOFMEMORY;
6090         }
6091         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6092                 width, height, dst_pitch, convert, surface);
6093     }
6094     else
6095     {
6096         mem = surface->resource.allocatedMemory;
6097     }
6098
6099     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6100     data.addr = mem;
6101     surface_upload_data(surface, gl_info, &format, &src_rect, width, &dst_point, srgb, &data);
6102
6103     context_release(context);
6104
6105     /* Don't delete PBO memory. */
6106     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6107         HeapFree(GetProcessHeap(), 0, mem);
6108
6109     return WINED3D_OK;
6110 }
6111
6112 static void surface_multisample_resolve(struct wined3d_surface *surface)
6113 {
6114     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6115
6116     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6117         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6118
6119     surface_blt_fbo(surface->resource.device, WINED3DTEXF_POINT,
6120             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6121 }
6122
6123 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6124 {
6125     struct wined3d_device *device = surface->resource.device;
6126     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6127     HRESULT hr;
6128
6129     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6130
6131     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6132     {
6133         if (location == SFLAG_INTEXTURE)
6134         {
6135             struct wined3d_context *context = context_acquire(device, NULL);
6136             surface_load_ds_location(surface, context, SFLAG_DS_OFFSCREEN);
6137             context_release(context);
6138             return WINED3D_OK;
6139         }
6140         else
6141         {
6142             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6143             return WINED3DERR_INVALIDCALL;
6144         }
6145     }
6146
6147     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6148         location = SFLAG_INTEXTURE;
6149
6150     if (surface->flags & location)
6151     {
6152         TRACE("Location already up to date.\n");
6153         return WINED3D_OK;
6154     }
6155
6156     if (WARN_ON(d3d_surface))
6157     {
6158         DWORD required_access = resource_access_from_location(location);
6159         if ((surface->resource.access_flags & required_access) != required_access)
6160             WARN("Operation requires %#x access, but surface only has %#x.\n",
6161                     required_access, surface->resource.access_flags);
6162     }
6163
6164     if (!(surface->flags & SFLAG_LOCATIONS))
6165     {
6166         ERR("Surface %p does not have any up to date location.\n", surface);
6167         surface->flags |= SFLAG_LOST;
6168         return WINED3DERR_DEVICELOST;
6169     }
6170
6171     switch (location)
6172     {
6173         case SFLAG_INSYSMEM:
6174             surface_load_sysmem(surface, gl_info, rect);
6175             break;
6176
6177         case SFLAG_INDRAWABLE:
6178             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6179                 return hr;
6180             break;
6181
6182         case SFLAG_INRB_RESOLVED:
6183             surface_multisample_resolve(surface);
6184             break;
6185
6186         case SFLAG_INTEXTURE:
6187         case SFLAG_INSRGBTEX:
6188             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6189                 return hr;
6190             break;
6191
6192         default:
6193             ERR("Don't know how to handle location %#x.\n", location);
6194             break;
6195     }
6196
6197     if (!rect)
6198     {
6199         surface->flags |= location;
6200
6201         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6202             surface_evict_sysmem(surface);
6203     }
6204
6205     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6206             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6207     {
6208         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6209     }
6210
6211     return WINED3D_OK;
6212 }
6213
6214 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6215 {
6216     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6217
6218     /* Not on a swapchain - must be offscreen */
6219     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6220
6221     /* The front buffer is always onscreen */
6222     if (surface == swapchain->front_buffer) return FALSE;
6223
6224     /* If the swapchain is rendered to an FBO, the backbuffer is
6225      * offscreen, otherwise onscreen */
6226     return swapchain->render_to_fbo;
6227 }
6228
6229 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6230 /* Context activation is done by the caller. */
6231 static void ffp_blit_free(struct wined3d_device *device) { }
6232
6233 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6234 /* Context activation is done by the caller. */
6235 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6236 {
6237     BYTE table[256][4];
6238     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6239
6240     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6241
6242     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6243     ENTER_GL();
6244     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6245     LEAVE_GL();
6246 }
6247
6248 /* Context activation is done by the caller. */
6249 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, struct wined3d_surface *surface)
6250 {
6251     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6252
6253     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6254      * else the surface is converted in software at upload time in LoadLocation.
6255      */
6256     if(fixup == COMPLEX_FIXUP_P8 && context->gl_info->supported[EXT_PALETTED_TEXTURE])
6257         ffp_blit_p8_upload_palette(surface, context->gl_info);
6258
6259     ENTER_GL();
6260     glEnable(surface->texture_target);
6261     checkGLcall("glEnable(surface->texture_target)");
6262     LEAVE_GL();
6263     return WINED3D_OK;
6264 }
6265
6266 /* Context activation is done by the caller. */
6267 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6268 {
6269     ENTER_GL();
6270     glDisable(GL_TEXTURE_2D);
6271     checkGLcall("glDisable(GL_TEXTURE_2D)");
6272     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6273     {
6274         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6275         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6276     }
6277     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6278     {
6279         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6280         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6281     }
6282     LEAVE_GL();
6283 }
6284
6285 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6286         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6287         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6288 {
6289     enum complex_fixup src_fixup;
6290
6291     switch (blit_op)
6292     {
6293         case WINED3D_BLIT_OP_COLOR_BLIT:
6294             if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
6295                 return FALSE;
6296
6297             src_fixup = get_complex_fixup(src_format->color_fixup);
6298             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6299             {
6300                 TRACE("Checking support for fixup:\n");
6301                 dump_color_fixup_desc(src_format->color_fixup);
6302             }
6303
6304             if (!is_identity_fixup(dst_format->color_fixup))
6305             {
6306                 TRACE("Destination fixups are not supported\n");
6307                 return FALSE;
6308             }
6309
6310             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6311             {
6312                 TRACE("P8 fixup supported\n");
6313                 return TRUE;
6314             }
6315
6316             /* We only support identity conversions. */
6317             if (is_identity_fixup(src_format->color_fixup))
6318             {
6319                 TRACE("[OK]\n");
6320                 return TRUE;
6321             }
6322
6323             TRACE("[FAILED]\n");
6324             return FALSE;
6325
6326         case WINED3D_BLIT_OP_COLOR_FILL:
6327             if (dst_pool == WINED3DPOOL_SYSTEMMEM)
6328                 return FALSE;
6329
6330             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6331             {
6332                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6333                     return FALSE;
6334             }
6335             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6336             {
6337                 TRACE("Color fill not supported\n");
6338                 return FALSE;
6339             }
6340
6341             /* FIXME: We should reject color fills on formats with fixups,
6342              * but this would break P8 color fills for example. */
6343
6344             return TRUE;
6345
6346         case WINED3D_BLIT_OP_DEPTH_FILL:
6347             return TRUE;
6348
6349         default:
6350             TRACE("Unsupported blit_op=%d\n", blit_op);
6351             return FALSE;
6352     }
6353 }
6354
6355 /* Do not call while under the GL lock. */
6356 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6357         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
6358 {
6359     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6360     struct wined3d_fb_state fb = {&dst_surface, NULL};
6361
6362     return device_clear_render_targets(device, 1, &fb,
6363             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6364 }
6365
6366 /* Do not call while under the GL lock. */
6367 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6368         struct wined3d_surface *surface, const RECT *rect, float depth)
6369 {
6370     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6371     struct wined3d_fb_state fb = {NULL, surface};
6372
6373     return device_clear_render_targets(device, 0, &fb,
6374             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6375 }
6376
6377 const struct blit_shader ffp_blit =  {
6378     ffp_blit_alloc,
6379     ffp_blit_free,
6380     ffp_blit_set,
6381     ffp_blit_unset,
6382     ffp_blit_supported,
6383     ffp_blit_color_fill,
6384     ffp_blit_depth_fill,
6385 };
6386
6387 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6388 {
6389     return WINED3D_OK;
6390 }
6391
6392 /* Context activation is done by the caller. */
6393 static void cpu_blit_free(struct wined3d_device *device)
6394 {
6395 }
6396
6397 /* Context activation is done by the caller. */
6398 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, struct wined3d_surface *surface)
6399 {
6400     return WINED3D_OK;
6401 }
6402
6403 /* Context activation is done by the caller. */
6404 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6405 {
6406 }
6407
6408 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6409         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6410         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6411 {
6412     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6413     {
6414         return TRUE;
6415     }
6416
6417     return FALSE;
6418 }
6419
6420 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6421         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6422         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
6423 {
6424     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6425     const struct wined3d_format *src_format, *dst_format;
6426     struct wined3d_surface *orig_src = src_surface;
6427     WINED3DLOCKED_RECT dlock, slock;
6428     HRESULT hr = WINED3D_OK;
6429     const BYTE *sbuf;
6430     RECT xdst,xsrc;
6431     BYTE *dbuf;
6432     int x, y;
6433
6434     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6435             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6436             flags, fx, debug_d3dtexturefiltertype(filter));
6437
6438     xsrc = *src_rect;
6439
6440     if (!src_surface)
6441     {
6442         RECT full_rect;
6443
6444         full_rect.left = 0;
6445         full_rect.top = 0;
6446         full_rect.right = dst_surface->resource.width;
6447         full_rect.bottom = dst_surface->resource.height;
6448         IntersectRect(&xdst, &full_rect, dst_rect);
6449     }
6450     else
6451     {
6452         BOOL clip_horiz, clip_vert;
6453
6454         xdst = *dst_rect;
6455         clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6456         clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6457
6458         if (clip_vert || clip_horiz)
6459         {
6460             /* Now check if this is a special case or not... */
6461             if ((flags & WINEDDBLT_DDFX)
6462                     || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6463                     || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6464             {
6465                 WARN("Out of screen rectangle in special case. Not handled right now.\n");
6466                 return WINED3D_OK;
6467             }
6468
6469             if (clip_horiz)
6470             {
6471                 if (xdst.left < 0)
6472                 {
6473                     xsrc.left -= xdst.left;
6474                     xdst.left = 0;
6475                 }
6476                 if (xdst.right > dst_surface->resource.width)
6477                 {
6478                     xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6479                     xdst.right = (int)dst_surface->resource.width;
6480                 }
6481             }
6482
6483             if (clip_vert)
6484             {
6485                 if (xdst.top < 0)
6486                 {
6487                     xsrc.top -= xdst.top;
6488                     xdst.top = 0;
6489                 }
6490                 if (xdst.bottom > dst_surface->resource.height)
6491                 {
6492                     xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6493                     xdst.bottom = (int)dst_surface->resource.height;
6494                 }
6495             }
6496
6497             /* And check if after clipping something is still to be done... */
6498             if ((xdst.right <= 0) || (xdst.bottom <= 0)
6499                     || (xdst.left >= (int)dst_surface->resource.width)
6500                     || (xdst.top >= (int)dst_surface->resource.height)
6501                     || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6502                     || (xsrc.left >= (int)src_surface->resource.width)
6503                     || (xsrc.top >= (int)src_surface->resource.height))
6504             {
6505                 TRACE("Nothing to be done after clipping.\n");
6506                 return WINED3D_OK;
6507             }
6508         }
6509     }
6510
6511     if (src_surface == dst_surface)
6512     {
6513         wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6514         slock = dlock;
6515         src_format = dst_surface->resource.format;
6516         dst_format = src_format;
6517     }
6518     else
6519     {
6520         dst_format = dst_surface->resource.format;
6521         if (src_surface)
6522         {
6523             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6524             {
6525                 src_surface = surface_convert_format(src_surface, dst_format->id);
6526                 if (!src_surface)
6527                 {
6528                     /* The conv function writes a FIXME */
6529                     WARN("Cannot convert source surface format to dest format.\n");
6530                     goto release;
6531                 }
6532             }
6533             wined3d_surface_map(src_surface, &slock, NULL, WINED3DLOCK_READONLY);
6534             src_format = src_surface->resource.format;
6535         }
6536         else
6537         {
6538             src_format = dst_format;
6539         }
6540         if (dst_rect)
6541             wined3d_surface_map(dst_surface, &dlock, &xdst, 0);
6542         else
6543             wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6544     }
6545
6546     bpp = dst_surface->resource.format->byte_count;
6547     srcheight = xsrc.bottom - xsrc.top;
6548     srcwidth = xsrc.right - xsrc.left;
6549     dstheight = xdst.bottom - xdst.top;
6550     dstwidth = xdst.right - xdst.left;
6551     width = (xdst.right - xdst.left) * bpp;
6552
6553     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_COMPRESSED)
6554     {
6555         UINT row_block_count;
6556
6557         if (flags || src_surface == dst_surface)
6558         {
6559             FIXME("Only plain blits supported on compressed surfaces.\n");
6560             hr = E_NOTIMPL;
6561             goto release;
6562         }
6563
6564         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6565
6566         if (srcheight != dstheight || srcwidth != dstwidth)
6567         {
6568             WARN("Stretching not supported on compressed surfaces.\n");
6569             hr = WINED3DERR_INVALIDCALL;
6570             goto release;
6571         }
6572
6573         dbuf = dlock.pBits;
6574         sbuf = slock.pBits;
6575
6576         row_block_count = (dstwidth + dst_format->block_width - 1) / dst_format->block_width;
6577         for (y = 0; y < dstheight; y += dst_format->block_height)
6578         {
6579             memcpy(dbuf, sbuf, row_block_count * dst_format->block_byte_count);
6580             dbuf += dlock.Pitch;
6581             sbuf += slock.Pitch;
6582         }
6583
6584         goto release;
6585     }
6586
6587     if (dst_rect && src_surface != dst_surface)
6588         dbuf = dlock.pBits;
6589     else
6590         dbuf = (BYTE*)dlock.pBits+(xdst.top*dlock.Pitch)+(xdst.left*bpp);
6591
6592     /* First, all the 'source-less' blits */
6593     if (flags & WINEDDBLT_COLORFILL)
6594     {
6595         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dlock.Pitch, fx->u5.dwFillColor);
6596         flags &= ~WINEDDBLT_COLORFILL;
6597     }
6598
6599     if (flags & WINEDDBLT_DEPTHFILL)
6600     {
6601         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6602     }
6603     if (flags & WINEDDBLT_ROP)
6604     {
6605         /* Catch some degenerate cases here. */
6606         switch (fx->dwROP)
6607         {
6608             case BLACKNESS:
6609                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,0);
6610                 break;
6611             case 0xAA0029: /* No-op */
6612                 break;
6613             case WHITENESS:
6614                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,~0);
6615                 break;
6616             case SRCCOPY: /* Well, we do that below? */
6617                 break;
6618             default:
6619                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6620                 goto error;
6621         }
6622         flags &= ~WINEDDBLT_ROP;
6623     }
6624     if (flags & WINEDDBLT_DDROPS)
6625     {
6626         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6627     }
6628     /* Now the 'with source' blits. */
6629     if (src_surface)
6630     {
6631         const BYTE *sbase;
6632         int sx, xinc, sy, yinc;
6633
6634         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6635             goto release;
6636
6637         if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
6638                 && (srcwidth != dstwidth || srcheight != dstheight))
6639         {
6640             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6641             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6642         }
6643
6644         sbase = (BYTE*)slock.pBits+(xsrc.top*slock.Pitch)+xsrc.left*bpp;
6645         xinc = (srcwidth << 16) / dstwidth;
6646         yinc = (srcheight << 16) / dstheight;
6647
6648         if (!flags)
6649         {
6650             /* No effects, we can cheat here. */
6651             if (dstwidth == srcwidth)
6652             {
6653                 if (dstheight == srcheight)
6654                 {
6655                     /* No stretching in either direction. This needs to be as
6656                      * fast as possible. */
6657                     sbuf = sbase;
6658
6659                     /* Check for overlapping surfaces. */
6660                     if (src_surface != dst_surface || xdst.top < xsrc.top
6661                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6662                     {
6663                         /* No overlap, or dst above src, so copy from top downwards. */
6664                         for (y = 0; y < dstheight; ++y)
6665                         {
6666                             memcpy(dbuf, sbuf, width);
6667                             sbuf += slock.Pitch;
6668                             dbuf += dlock.Pitch;
6669                         }
6670                     }
6671                     else if (xdst.top > xsrc.top)
6672                     {
6673                         /* Copy from bottom upwards. */
6674                         sbuf += (slock.Pitch*dstheight);
6675                         dbuf += (dlock.Pitch*dstheight);
6676                         for (y = 0; y < dstheight; ++y)
6677                         {
6678                             sbuf -= slock.Pitch;
6679                             dbuf -= dlock.Pitch;
6680                             memcpy(dbuf, sbuf, width);
6681                         }
6682                     }
6683                     else
6684                     {
6685                         /* Src and dst overlapping on the same line, use memmove. */
6686                         for (y = 0; y < dstheight; ++y)
6687                         {
6688                             memmove(dbuf, sbuf, width);
6689                             sbuf += slock.Pitch;
6690                             dbuf += dlock.Pitch;
6691                         }
6692                     }
6693                 }
6694                 else
6695                 {
6696                     /* Stretching in y direction only. */
6697                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6698                     {
6699                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6700                         memcpy(dbuf, sbuf, width);
6701                         dbuf += dlock.Pitch;
6702                     }
6703                 }
6704             }
6705             else
6706             {
6707                 /* Stretching in X direction. */
6708                 int last_sy = -1;
6709                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6710                 {
6711                     sbuf = sbase + (sy >> 16) * slock.Pitch;
6712
6713                     if ((sy >> 16) == (last_sy >> 16))
6714                     {
6715                         /* This source row is the same as last source row -
6716                          * Copy the already stretched row. */
6717                         memcpy(dbuf, dbuf - dlock.Pitch, width);
6718                     }
6719                     else
6720                     {
6721 #define STRETCH_ROW(type) \
6722 do { \
6723     const type *s = (const type *)sbuf; \
6724     type *d = (type *)dbuf; \
6725     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6726         d[x] = s[sx >> 16]; \
6727 } while(0)
6728
6729                         switch(bpp)
6730                         {
6731                             case 1:
6732                                 STRETCH_ROW(BYTE);
6733                                 break;
6734                             case 2:
6735                                 STRETCH_ROW(WORD);
6736                                 break;
6737                             case 4:
6738                                 STRETCH_ROW(DWORD);
6739                                 break;
6740                             case 3:
6741                             {
6742                                 const BYTE *s;
6743                                 BYTE *d = dbuf;
6744                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6745                                 {
6746                                     DWORD pixel;
6747
6748                                     s = sbuf + 3 * (sx >> 16);
6749                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6750                                     d[0] = (pixel      ) & 0xff;
6751                                     d[1] = (pixel >>  8) & 0xff;
6752                                     d[2] = (pixel >> 16) & 0xff;
6753                                     d += 3;
6754                                 }
6755                                 break;
6756                             }
6757                             default:
6758                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6759                                 hr = WINED3DERR_NOTAVAILABLE;
6760                                 goto error;
6761                         }
6762 #undef STRETCH_ROW
6763                     }
6764                     dbuf += dlock.Pitch;
6765                     last_sy = sy;
6766                 }
6767             }
6768         }
6769         else
6770         {
6771             LONG dstyinc = dlock.Pitch, dstxinc = bpp;
6772             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6773             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6774             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6775             {
6776                 /* The color keying flags are checked for correctness in ddraw */
6777                 if (flags & WINEDDBLT_KEYSRC)
6778                 {
6779                     keylow  = src_surface->SrcBltCKey.dwColorSpaceLowValue;
6780                     keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
6781                 }
6782                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6783                 {
6784                     keylow = fx->ddckSrcColorkey.dwColorSpaceLowValue;
6785                     keyhigh = fx->ddckSrcColorkey.dwColorSpaceHighValue;
6786                 }
6787
6788                 if (flags & WINEDDBLT_KEYDEST)
6789                 {
6790                     /* Destination color keys are taken from the source surface! */
6791                     destkeylow = src_surface->DestBltCKey.dwColorSpaceLowValue;
6792                     destkeyhigh = src_surface->DestBltCKey.dwColorSpaceHighValue;
6793                 }
6794                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6795                 {
6796                     destkeylow = fx->ddckDestColorkey.dwColorSpaceLowValue;
6797                     destkeyhigh = fx->ddckDestColorkey.dwColorSpaceHighValue;
6798                 }
6799
6800                 if (bpp == 1)
6801                 {
6802                     keymask = 0xff;
6803                 }
6804                 else
6805                 {
6806                     keymask = src_format->red_mask
6807                             | src_format->green_mask
6808                             | src_format->blue_mask;
6809                 }
6810                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6811             }
6812
6813             if (flags & WINEDDBLT_DDFX)
6814             {
6815                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6816                 LONG tmpxy;
6817                 dTopLeft     = dbuf;
6818                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6819                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dlock.Pitch);
6820                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6821
6822                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6823                 {
6824                     /* I don't think we need to do anything about this flag */
6825                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6826                 }
6827                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6828                 {
6829                     tmp          = dTopRight;
6830                     dTopRight    = dTopLeft;
6831                     dTopLeft     = tmp;
6832                     tmp          = dBottomRight;
6833                     dBottomRight = dBottomLeft;
6834                     dBottomLeft  = tmp;
6835                     dstxinc = dstxinc * -1;
6836                 }
6837                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6838                 {
6839                     tmp          = dTopLeft;
6840                     dTopLeft     = dBottomLeft;
6841                     dBottomLeft  = tmp;
6842                     tmp          = dTopRight;
6843                     dTopRight    = dBottomRight;
6844                     dBottomRight = tmp;
6845                     dstyinc = dstyinc * -1;
6846                 }
6847                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6848                 {
6849                     /* I don't think we need to do anything about this flag */
6850                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6851                 }
6852                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6853                 {
6854                     tmp          = dBottomRight;
6855                     dBottomRight = dTopLeft;
6856                     dTopLeft     = tmp;
6857                     tmp          = dBottomLeft;
6858                     dBottomLeft  = dTopRight;
6859                     dTopRight    = tmp;
6860                     dstxinc = dstxinc * -1;
6861                     dstyinc = dstyinc * -1;
6862                 }
6863                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6864                 {
6865                     tmp          = dTopLeft;
6866                     dTopLeft     = dBottomLeft;
6867                     dBottomLeft  = dBottomRight;
6868                     dBottomRight = dTopRight;
6869                     dTopRight    = tmp;
6870                     tmpxy   = dstxinc;
6871                     dstxinc = dstyinc;
6872                     dstyinc = tmpxy;
6873                     dstxinc = dstxinc * -1;
6874                 }
6875                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6876                 {
6877                     tmp          = dTopLeft;
6878                     dTopLeft     = dTopRight;
6879                     dTopRight    = dBottomRight;
6880                     dBottomRight = dBottomLeft;
6881                     dBottomLeft  = tmp;
6882                     tmpxy   = dstxinc;
6883                     dstxinc = dstyinc;
6884                     dstyinc = tmpxy;
6885                     dstyinc = dstyinc * -1;
6886                 }
6887                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6888                 {
6889                     /* I don't think we need to do anything about this flag */
6890                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6891                 }
6892                 dbuf = dTopLeft;
6893                 flags &= ~(WINEDDBLT_DDFX);
6894             }
6895
6896 #define COPY_COLORKEY_FX(type) \
6897 do { \
6898     const type *s; \
6899     type *d = (type *)dbuf, *dx, tmp; \
6900     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
6901     { \
6902         s = (const type *)(sbase + (sy >> 16) * slock.Pitch); \
6903         dx = d; \
6904         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6905         { \
6906             tmp = s[sx >> 16]; \
6907             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
6908                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
6909             { \
6910                 dx[0] = tmp; \
6911             } \
6912             dx = (type *)(((BYTE *)dx) + dstxinc); \
6913         } \
6914         d = (type *)(((BYTE *)d) + dstyinc); \
6915     } \
6916 } while(0)
6917
6918             switch (bpp)
6919             {
6920                 case 1:
6921                     COPY_COLORKEY_FX(BYTE);
6922                     break;
6923                 case 2:
6924                     COPY_COLORKEY_FX(WORD);
6925                     break;
6926                 case 4:
6927                     COPY_COLORKEY_FX(DWORD);
6928                     break;
6929                 case 3:
6930                 {
6931                     const BYTE *s;
6932                     BYTE *d = dbuf, *dx;
6933                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6934                     {
6935                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6936                         dx = d;
6937                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
6938                         {
6939                             DWORD pixel, dpixel = 0;
6940                             s = sbuf + 3 * (sx>>16);
6941                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6942                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
6943                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
6944                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
6945                             {
6946                                 dx[0] = (pixel      ) & 0xff;
6947                                 dx[1] = (pixel >>  8) & 0xff;
6948                                 dx[2] = (pixel >> 16) & 0xff;
6949                             }
6950                             dx += dstxinc;
6951                         }
6952                         d += dstyinc;
6953                     }
6954                     break;
6955                 }
6956                 default:
6957                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
6958                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
6959                     hr = WINED3DERR_NOTAVAILABLE;
6960                     goto error;
6961 #undef COPY_COLORKEY_FX
6962             }
6963         }
6964     }
6965
6966 error:
6967     if (flags && FIXME_ON(d3d_surface))
6968     {
6969         FIXME("\tUnsupported flags: %#x.\n", flags);
6970     }
6971
6972 release:
6973     wined3d_surface_unmap(dst_surface);
6974     if (src_surface && src_surface != dst_surface)
6975         wined3d_surface_unmap(src_surface);
6976     /* Release the converted surface, if any. */
6977     if (src_surface && src_surface != orig_src)
6978         wined3d_surface_decref(src_surface);
6979
6980     return hr;
6981 }
6982
6983 /* Do not call while under the GL lock. */
6984 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6985         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
6986 {
6987     static const RECT src_rect;
6988     WINEDDBLTFX BltFx;
6989
6990     memset(&BltFx, 0, sizeof(BltFx));
6991     BltFx.dwSize = sizeof(BltFx);
6992     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
6993     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
6994             WINEDDBLT_COLORFILL, &BltFx, WINED3DTEXF_POINT);
6995 }
6996
6997 /* Do not call while under the GL lock. */
6998 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
6999         struct wined3d_surface *surface, const RECT *rect, float depth)
7000 {
7001     FIXME("Depth filling not implemented by cpu_blit.\n");
7002     return WINED3DERR_INVALIDCALL;
7003 }
7004
7005 const struct blit_shader cpu_blit =  {
7006     cpu_blit_alloc,
7007     cpu_blit_free,
7008     cpu_blit_set,
7009     cpu_blit_unset,
7010     cpu_blit_supported,
7011     cpu_blit_color_fill,
7012     cpu_blit_depth_fill,
7013 };
7014
7015 static HRESULT surface_init(struct wined3d_surface *surface, WINED3DSURFTYPE surface_type, UINT alignment,
7016         UINT width, UINT height, UINT level, BOOL lockable, BOOL discard, WINED3DMULTISAMPLE_TYPE multisample_type,
7017         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7018         WINED3DPOOL pool, void *parent, const struct wined3d_parent_ops *parent_ops)
7019 {
7020     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7021     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7022     unsigned int resource_size;
7023     HRESULT hr;
7024
7025     if (multisample_quality > 0)
7026     {
7027         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7028         multisample_quality = 0;
7029     }
7030
7031     /* Quick lockable sanity check.
7032      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7033      * this function is too deep to need to care about things like this.
7034      * Levels need to be checked too, since they all affect what can be done. */
7035     switch (pool)
7036     {
7037         case WINED3DPOOL_SCRATCH:
7038             if (!lockable)
7039             {
7040                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7041                         "which are mutually exclusive, setting lockable to TRUE.\n");
7042                 lockable = TRUE;
7043             }
7044             break;
7045
7046         case WINED3DPOOL_SYSTEMMEM:
7047             if (!lockable)
7048                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7049             break;
7050
7051         case WINED3DPOOL_MANAGED:
7052             if (usage & WINED3DUSAGE_DYNAMIC)
7053                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7054             break;
7055
7056         case WINED3DPOOL_DEFAULT:
7057             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7058                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7059             break;
7060
7061         default:
7062             FIXME("Unknown pool %#x.\n", pool);
7063             break;
7064     };
7065
7066     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3DPOOL_DEFAULT)
7067         FIXME("Trying to create a render target that isn't in the default pool.\n");
7068
7069     /* FIXME: Check that the format is supported by the device. */
7070
7071     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7072     if (!resource_size)
7073         return WINED3DERR_INVALIDCALL;
7074
7075     surface->surface_type = surface_type;
7076
7077     switch (surface_type)
7078     {
7079         case SURFACE_OPENGL:
7080             surface->surface_ops = &surface_ops;
7081             break;
7082
7083         case SURFACE_GDI:
7084             surface->surface_ops = &gdi_surface_ops;
7085             break;
7086
7087         default:
7088             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7089             return WINED3DERR_INVALIDCALL;
7090     }
7091
7092     hr = resource_init(&surface->resource, device, WINED3DRTYPE_SURFACE, format,
7093             multisample_type, multisample_quality, usage, pool, width, height, 1,
7094             resource_size, parent, parent_ops, &surface_resource_ops);
7095     if (FAILED(hr))
7096     {
7097         WARN("Failed to initialize resource, returning %#x.\n", hr);
7098         return hr;
7099     }
7100
7101     /* "Standalone" surface. */
7102     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7103
7104     surface->texture_level = level;
7105     list_init(&surface->overlays);
7106
7107     /* Flags */
7108     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7109     if (discard)
7110         surface->flags |= SFLAG_DISCARD;
7111     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7112         surface->flags |= SFLAG_LOCKABLE;
7113     /* I'm not sure if this qualifies as a hack or as an optimization. It
7114      * seems reasonable to assume that lockable render targets will get
7115      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7116      * creation. However, the other reason we want to do this is that several
7117      * ddraw applications access surface memory while the surface isn't
7118      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7119      * future locks prevents these from crashing. */
7120     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7121         surface->flags |= SFLAG_DYNLOCK;
7122
7123     /* Mark the texture as dirty so that it gets loaded first time around. */
7124     surface_add_dirty_rect(surface, NULL);
7125     list_init(&surface->renderbuffers);
7126
7127     TRACE("surface %p, memory %p, size %u\n",
7128             surface, surface->resource.allocatedMemory, surface->resource.size);
7129
7130     /* Call the private setup routine */
7131     hr = surface->surface_ops->surface_private_setup(surface);
7132     if (FAILED(hr))
7133     {
7134         ERR("Private setup failed, returning %#x\n", hr);
7135         surface->surface_ops->surface_cleanup(surface);
7136         return hr;
7137     }
7138
7139     return hr;
7140 }
7141
7142 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7143         enum wined3d_format_id format_id, BOOL lockable, BOOL discard, UINT level, DWORD usage, WINED3DPOOL pool,
7144         WINED3DMULTISAMPLE_TYPE multisample_type, DWORD multisample_quality, WINED3DSURFTYPE surface_type,
7145         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7146 {
7147     struct wined3d_surface *object;
7148     HRESULT hr;
7149
7150     TRACE("device %p, width %u, height %u, format %s, lockable %#x, discard %#x, level %u\n",
7151             device, width, height, debug_d3dformat(format_id), lockable, discard, level);
7152     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7153             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7154     TRACE("surface_type %#x, parent %p, parent_ops %p.\n", surface_type, parent, parent_ops);
7155
7156     if (surface_type == SURFACE_OPENGL && !device->adapter)
7157     {
7158         ERR("OpenGL surfaces are not available without OpenGL.\n");
7159         return WINED3DERR_NOTAVAILABLE;
7160     }
7161
7162     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7163     if (!object)
7164     {
7165         ERR("Failed to allocate surface memory.\n");
7166         return WINED3DERR_OUTOFVIDEOMEMORY;
7167     }
7168
7169     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level, lockable,
7170             discard, multisample_type, multisample_quality, device, usage, format_id, pool, parent, parent_ops);
7171     if (FAILED(hr))
7172     {
7173         WARN("Failed to initialize surface, returning %#x.\n", hr);
7174         HeapFree(GetProcessHeap(), 0, object);
7175         return hr;
7176     }
7177
7178     TRACE("Created surface %p.\n", object);
7179     *surface = object;
7180
7181     return hr;
7182 }