jscript: Removed useless undef ACTSCPSITE_THIS.
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2008 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         WINED3DTEXTUREFILTERTYPE filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     TRACE("surface %p.\n", surface);
46
47     if (surface->texture_name || (surface->flags & SFLAG_PBO)
48              || surface->rb_multisample || surface->rb_resolved
49              || !list_empty(&surface->renderbuffers))
50     {
51         struct wined3d_renderbuffer_entry *entry, *entry2;
52         const struct wined3d_gl_info *gl_info;
53         struct wined3d_context *context;
54
55         context = context_acquire(surface->resource.device, NULL);
56         gl_info = context->gl_info;
57
58         ENTER_GL();
59
60         if (surface->texture_name)
61         {
62             TRACE("Deleting texture %u.\n", surface->texture_name);
63             glDeleteTextures(1, &surface->texture_name);
64         }
65
66         if (surface->flags & SFLAG_PBO)
67         {
68             TRACE("Deleting PBO %u.\n", surface->pbo);
69             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
70         }
71
72         if (surface->rb_multisample)
73         {
74             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
75             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
76         }
77
78         if (surface->rb_resolved)
79         {
80             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
81             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
82         }
83
84         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
85         {
86             TRACE("Deleting renderbuffer %u.\n", entry->id);
87             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
88             HeapFree(GetProcessHeap(), 0, entry);
89         }
90
91         LEAVE_GL();
92
93         context_release(context);
94     }
95
96     if (surface->flags & SFLAG_DIBSECTION)
97     {
98         /* Release the DC. */
99         SelectObject(surface->hDC, surface->dib.holdbitmap);
100         DeleteDC(surface->hDC);
101         /* Release the DIB section. */
102         DeleteObject(surface->dib.DIBsection);
103         surface->dib.bitmap_data = NULL;
104         surface->resource.allocatedMemory = NULL;
105     }
106
107     if (surface->flags & SFLAG_USERPTR)
108         wined3d_surface_set_mem(surface, NULL);
109     if (surface->overlay_dest)
110         list_remove(&surface->overlay_entry);
111
112     HeapFree(GetProcessHeap(), 0, surface->palette9);
113
114     resource_cleanup(&surface->resource);
115 }
116
117 void surface_update_draw_binding(struct wined3d_surface *surface)
118 {
119     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
120         surface->draw_binding = SFLAG_INDRAWABLE;
121     else if (surface->resource.multisample_type)
122         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
123     else
124         surface->draw_binding = SFLAG_INTEXTURE;
125 }
126
127 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
128 {
129     TRACE("surface %p, container %p.\n", surface, container);
130
131     if (!container && type != WINED3D_CONTAINER_NONE)
132         ERR("Setting NULL container of type %#x.\n", type);
133
134     if (type == WINED3D_CONTAINER_SWAPCHAIN)
135     {
136         surface->get_drawable_size = get_drawable_size_swapchain;
137     }
138     else
139     {
140         switch (wined3d_settings.offscreen_rendering_mode)
141         {
142             case ORM_FBO:
143                 surface->get_drawable_size = get_drawable_size_fbo;
144                 break;
145
146             case ORM_BACKBUFFER:
147                 surface->get_drawable_size = get_drawable_size_backbuffer;
148                 break;
149
150             default:
151                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
152                 return;
153         }
154     }
155
156     surface->container.type = type;
157     surface->container.u.base = container;
158     surface_update_draw_binding(surface);
159 }
160
161 struct blt_info
162 {
163     GLenum binding;
164     GLenum bind_target;
165     enum tex_types tex_type;
166     GLfloat coords[4][3];
167 };
168
169 struct float_rect
170 {
171     float l;
172     float t;
173     float r;
174     float b;
175 };
176
177 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
178 {
179     f->l = ((r->left * 2.0f) / w) - 1.0f;
180     f->t = ((r->top * 2.0f) / h) - 1.0f;
181     f->r = ((r->right * 2.0f) / w) - 1.0f;
182     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
183 }
184
185 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
186 {
187     GLfloat (*coords)[3] = info->coords;
188     struct float_rect f;
189
190     switch (target)
191     {
192         default:
193             FIXME("Unsupported texture target %#x\n", target);
194             /* Fall back to GL_TEXTURE_2D */
195         case GL_TEXTURE_2D:
196             info->binding = GL_TEXTURE_BINDING_2D;
197             info->bind_target = GL_TEXTURE_2D;
198             info->tex_type = tex_2d;
199             coords[0][0] = (float)rect->left / w;
200             coords[0][1] = (float)rect->top / h;
201             coords[0][2] = 0.0f;
202
203             coords[1][0] = (float)rect->right / w;
204             coords[1][1] = (float)rect->top / h;
205             coords[1][2] = 0.0f;
206
207             coords[2][0] = (float)rect->left / w;
208             coords[2][1] = (float)rect->bottom / h;
209             coords[2][2] = 0.0f;
210
211             coords[3][0] = (float)rect->right / w;
212             coords[3][1] = (float)rect->bottom / h;
213             coords[3][2] = 0.0f;
214             break;
215
216         case GL_TEXTURE_RECTANGLE_ARB:
217             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
218             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
219             info->tex_type = tex_rect;
220             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
221             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
222             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
223             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
224             break;
225
226         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
227             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
228             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
229             info->tex_type = tex_cube;
230             cube_coords_float(rect, w, h, &f);
231
232             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
233             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
234             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
235             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
236             break;
237
238         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
239             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
240             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
241             info->tex_type = tex_cube;
242             cube_coords_float(rect, w, h, &f);
243
244             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
245             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
246             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
247             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
248             break;
249
250         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
251             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
252             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
253             info->tex_type = tex_cube;
254             cube_coords_float(rect, w, h, &f);
255
256             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
257             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
258             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
259             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
260             break;
261
262         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
263             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
264             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
265             info->tex_type = tex_cube;
266             cube_coords_float(rect, w, h, &f);
267
268             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
269             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
270             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
271             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
272             break;
273
274         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
275             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
276             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
277             info->tex_type = tex_cube;
278             cube_coords_float(rect, w, h, &f);
279
280             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
281             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
282             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
283             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
284             break;
285
286         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
287             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
288             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
289             info->tex_type = tex_cube;
290             cube_coords_float(rect, w, h, &f);
291
292             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
293             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
294             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
295             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
296             break;
297     }
298 }
299
300 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
301 {
302     if (rect_in)
303         *rect_out = *rect_in;
304     else
305     {
306         rect_out->left = 0;
307         rect_out->top = 0;
308         rect_out->right = surface->resource.width;
309         rect_out->bottom = surface->resource.height;
310     }
311 }
312
313 /* GL locking and context activation is done by the caller */
314 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
315         const RECT *src_rect, const RECT *dst_rect, WINED3DTEXTUREFILTERTYPE Filter)
316 {
317     struct blt_info info;
318
319     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
320
321     glEnable(info.bind_target);
322     checkGLcall("glEnable(bind_target)");
323
324     context_bind_texture(context, info.bind_target, src_surface->texture_name);
325
326     /* Filtering for StretchRect */
327     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
328             wined3d_gl_mag_filter(magLookup, Filter));
329     checkGLcall("glTexParameteri");
330     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
331             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
332     checkGLcall("glTexParameteri");
333     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
334     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
335     if (context->gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
336         glTexParameteri(info.bind_target, GL_TEXTURE_SRGB_DECODE_EXT, GL_SKIP_DECODE_EXT);
337     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
338     checkGLcall("glTexEnvi");
339
340     /* Draw a quad */
341     glBegin(GL_TRIANGLE_STRIP);
342     glTexCoord3fv(info.coords[0]);
343     glVertex2i(dst_rect->left, dst_rect->top);
344
345     glTexCoord3fv(info.coords[1]);
346     glVertex2i(dst_rect->right, dst_rect->top);
347
348     glTexCoord3fv(info.coords[2]);
349     glVertex2i(dst_rect->left, dst_rect->bottom);
350
351     glTexCoord3fv(info.coords[3]);
352     glVertex2i(dst_rect->right, dst_rect->bottom);
353     glEnd();
354
355     /* Unbind the texture */
356     context_bind_texture(context, info.bind_target, 0);
357
358     /* We changed the filtering settings on the texture. Inform the
359      * container about this to get the filters reset properly next draw. */
360     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
361     {
362         struct wined3d_texture *texture = src_surface->container.u.texture;
363         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3DTEXF_POINT;
364         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3DTEXF_POINT;
365         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3DTEXF_NONE;
366         texture->texture_rgb.states[WINED3DTEXSTA_SRGBTEXTURE] = FALSE;
367     }
368 }
369
370 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
371 {
372     const struct wined3d_format *format = surface->resource.format;
373     SYSTEM_INFO sysInfo;
374     BITMAPINFO *b_info;
375     int extraline = 0;
376     DWORD *masks;
377     UINT usage;
378     HDC dc;
379
380     TRACE("surface %p.\n", surface);
381
382     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
383     {
384         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
385         return WINED3DERR_INVALIDCALL;
386     }
387
388     switch (format->byte_count)
389     {
390         case 2:
391         case 4:
392             /* Allocate extra space to store the RGB bit masks. */
393             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
394             break;
395
396         case 3:
397             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
398             break;
399
400         default:
401             /* Allocate extra space for a palette. */
402             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
403                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
404             break;
405     }
406
407     if (!b_info)
408         return E_OUTOFMEMORY;
409
410     /* Some applications access the surface in via DWORDs, and do not take
411      * the necessary care at the end of the surface. So we need at least
412      * 4 extra bytes at the end of the surface. Check against the page size,
413      * if the last page used for the surface has at least 4 spare bytes we're
414      * safe, otherwise add an extra line to the DIB section. */
415     GetSystemInfo(&sysInfo);
416     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
417     {
418         extraline = 1;
419         TRACE("Adding an extra line to the DIB section.\n");
420     }
421
422     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
423     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
424     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
425     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
426     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
427             * wined3d_surface_get_pitch(surface);
428     b_info->bmiHeader.biPlanes = 1;
429     b_info->bmiHeader.biBitCount = format->byte_count * 8;
430
431     b_info->bmiHeader.biXPelsPerMeter = 0;
432     b_info->bmiHeader.biYPelsPerMeter = 0;
433     b_info->bmiHeader.biClrUsed = 0;
434     b_info->bmiHeader.biClrImportant = 0;
435
436     /* Get the bit masks */
437     masks = (DWORD *)b_info->bmiColors;
438     switch (surface->resource.format->id)
439     {
440         case WINED3DFMT_B8G8R8_UNORM:
441             usage = DIB_RGB_COLORS;
442             b_info->bmiHeader.biCompression = BI_RGB;
443             break;
444
445         case WINED3DFMT_B5G5R5X1_UNORM:
446         case WINED3DFMT_B5G5R5A1_UNORM:
447         case WINED3DFMT_B4G4R4A4_UNORM:
448         case WINED3DFMT_B4G4R4X4_UNORM:
449         case WINED3DFMT_B2G3R3_UNORM:
450         case WINED3DFMT_B2G3R3A8_UNORM:
451         case WINED3DFMT_R10G10B10A2_UNORM:
452         case WINED3DFMT_R8G8B8A8_UNORM:
453         case WINED3DFMT_R8G8B8X8_UNORM:
454         case WINED3DFMT_B10G10R10A2_UNORM:
455         case WINED3DFMT_B5G6R5_UNORM:
456         case WINED3DFMT_R16G16B16A16_UNORM:
457             usage = 0;
458             b_info->bmiHeader.biCompression = BI_BITFIELDS;
459             masks[0] = format->red_mask;
460             masks[1] = format->green_mask;
461             masks[2] = format->blue_mask;
462             break;
463
464         default:
465             /* Don't know palette */
466             b_info->bmiHeader.biCompression = BI_RGB;
467             usage = 0;
468             break;
469     }
470
471     if (!(dc = GetDC(0)))
472     {
473         HeapFree(GetProcessHeap(), 0, b_info);
474         return HRESULT_FROM_WIN32(GetLastError());
475     }
476
477     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
478             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
479             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
480     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
481     ReleaseDC(0, dc);
482
483     if (!surface->dib.DIBsection)
484     {
485         ERR("Failed to create DIB section.\n");
486         HeapFree(GetProcessHeap(), 0, b_info);
487         return HRESULT_FROM_WIN32(GetLastError());
488     }
489
490     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
491     /* Copy the existing surface to the dib section. */
492     if (surface->resource.allocatedMemory)
493     {
494         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
495                 surface->resource.height * wined3d_surface_get_pitch(surface));
496     }
497     else
498     {
499         /* This is to make maps read the GL texture although memory is allocated. */
500         surface->flags &= ~SFLAG_INSYSMEM;
501     }
502     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
503
504     HeapFree(GetProcessHeap(), 0, b_info);
505
506     /* Now allocate a DC. */
507     surface->hDC = CreateCompatibleDC(0);
508     surface->dib.holdbitmap = SelectObject(surface->hDC, surface->dib.DIBsection);
509     TRACE("Using wined3d palette %p.\n", surface->palette);
510     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
511
512     surface->flags |= SFLAG_DIBSECTION;
513
514     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
515     surface->resource.heapMemory = NULL;
516
517     return WINED3D_OK;
518 }
519
520 static void surface_prepare_system_memory(struct wined3d_surface *surface)
521 {
522     struct wined3d_device *device = surface->resource.device;
523     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
524
525     TRACE("surface %p.\n", surface);
526
527     /* Performance optimization: Count how often a surface is locked, if it is
528      * locked regularly do not throw away the system memory copy. This avoids
529      * the need to download the surface from OpenGL all the time. The surface
530      * is still downloaded if the OpenGL texture is changed. */
531     if (!(surface->flags & SFLAG_DYNLOCK))
532     {
533         if (++surface->lockCount > MAXLOCKCOUNT)
534         {
535             TRACE("Surface is locked regularly, not freeing the system memory copy any more.\n");
536             surface->flags |= SFLAG_DYNLOCK;
537         }
538     }
539
540     /* Create a PBO for dynamically locked surfaces but don't do it for
541      * converted or NPOT surfaces. Also don't create a PBO for systemmem
542      * surfaces. */
543     if (gl_info->supported[ARB_PIXEL_BUFFER_OBJECT] && (surface->flags & SFLAG_DYNLOCK)
544             && !(surface->flags & (SFLAG_PBO | SFLAG_CONVERTED | SFLAG_NONPOW2))
545             && (surface->resource.pool != WINED3DPOOL_SYSTEMMEM))
546     {
547         struct wined3d_context *context;
548         GLenum error;
549
550         context = context_acquire(device, NULL);
551         ENTER_GL();
552
553         GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
554         error = glGetError();
555         if (!surface->pbo || error != GL_NO_ERROR)
556             ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
557
558         TRACE("Binding PBO %u.\n", surface->pbo);
559
560         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
561         checkGLcall("glBindBufferARB");
562
563         GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
564                 surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
565         checkGLcall("glBufferDataARB");
566
567         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
568         checkGLcall("glBindBufferARB");
569
570         /* We don't need the system memory anymore and we can't even use it for PBOs. */
571         if (!(surface->flags & SFLAG_CLIENT))
572         {
573             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
574             surface->resource.heapMemory = NULL;
575         }
576         surface->resource.allocatedMemory = NULL;
577         surface->flags |= SFLAG_PBO;
578         LEAVE_GL();
579         context_release(context);
580     }
581     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
582     {
583         /* Whatever surface we have, make sure that there is memory allocated
584          * for the downloaded copy, or a PBO to map. */
585         if (!surface->resource.heapMemory)
586             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
587
588         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
589                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
590
591         if (surface->flags & SFLAG_INSYSMEM)
592             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
593     }
594 }
595
596 static void surface_evict_sysmem(struct wined3d_surface *surface)
597 {
598     if (surface->flags & SFLAG_DONOTFREE)
599         return;
600
601     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
602     surface->resource.allocatedMemory = NULL;
603     surface->resource.heapMemory = NULL;
604     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
605 }
606
607 /* Context activation is done by the caller. */
608 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
609         struct wined3d_context *context, BOOL srgb)
610 {
611     struct wined3d_device *device = surface->resource.device;
612     DWORD active_sampler;
613
614     /* We don't need a specific texture unit, but after binding the texture
615      * the current unit is dirty. Read the unit back instead of switching to
616      * 0, this avoids messing around with the state manager's GL states. The
617      * current texture unit should always be a valid one.
618      *
619      * To be more specific, this is tricky because we can implicitly be
620      * called from sampler() in state.c. This means we can't touch anything
621      * other than whatever happens to be the currently active texture, or we
622      * would risk marking already applied sampler states dirty again. */
623     active_sampler = device->rev_tex_unit_map[context->active_texture];
624
625     if (active_sampler != WINED3D_UNMAPPED_STAGE)
626         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
627     surface_bind(surface, context, srgb);
628 }
629
630 static void surface_force_reload(struct wined3d_surface *surface)
631 {
632     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
633 }
634
635 static void surface_release_client_storage(struct wined3d_surface *surface)
636 {
637     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
638
639     ENTER_GL();
640     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
641     if (surface->texture_name)
642     {
643         surface_bind_and_dirtify(surface, context, FALSE);
644         glTexImage2D(surface->texture_target, surface->texture_level,
645                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
646     }
647     if (surface->texture_name_srgb)
648     {
649         surface_bind_and_dirtify(surface, context, TRUE);
650         glTexImage2D(surface->texture_target, surface->texture_level,
651                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
652     }
653     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
654     LEAVE_GL();
655
656     context_release(context);
657
658     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
659     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
660     surface_force_reload(surface);
661 }
662
663 static HRESULT surface_private_setup(struct wined3d_surface *surface)
664 {
665     /* TODO: Check against the maximum texture sizes supported by the video card. */
666     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
667     unsigned int pow2Width, pow2Height;
668
669     TRACE("surface %p.\n", surface);
670
671     surface->texture_name = 0;
672     surface->texture_target = GL_TEXTURE_2D;
673
674     /* Non-power2 support */
675     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
676     {
677         pow2Width = surface->resource.width;
678         pow2Height = surface->resource.height;
679     }
680     else
681     {
682         /* Find the nearest pow2 match */
683         pow2Width = pow2Height = 1;
684         while (pow2Width < surface->resource.width)
685             pow2Width <<= 1;
686         while (pow2Height < surface->resource.height)
687             pow2Height <<= 1;
688     }
689     surface->pow2Width = pow2Width;
690     surface->pow2Height = pow2Height;
691
692     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
693     {
694         /* TODO: Add support for non power two compressed textures. */
695         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
696         {
697             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
698                   surface, surface->resource.width, surface->resource.height);
699             return WINED3DERR_NOTAVAILABLE;
700         }
701     }
702
703     if (pow2Width != surface->resource.width
704             || pow2Height != surface->resource.height)
705     {
706         surface->flags |= SFLAG_NONPOW2;
707     }
708
709     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
710             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
711     {
712         /* One of three options:
713          * 1: Do the same as we do with NPOT and scale the texture, (any
714          *    texture ops would require the texture to be scaled which is
715          *    potentially slow)
716          * 2: Set the texture to the maximum size (bad idea).
717          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
718          * 4: Create the surface, but allow it to be used only for DirectDraw
719          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
720          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
721          *    the render target. */
722         if (surface->resource.pool == WINED3DPOOL_DEFAULT || surface->resource.pool == WINED3DPOOL_MANAGED)
723         {
724             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
725             return WINED3DERR_NOTAVAILABLE;
726         }
727
728         /* We should never use this surface in combination with OpenGL! */
729         TRACE("Creating an oversized surface: %ux%u.\n",
730                 surface->pow2Width, surface->pow2Height);
731     }
732     else
733     {
734         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
735          * and EXT_PALETTED_TEXTURE is used in combination with texture
736          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
737          * EXT_PALETTED_TEXTURE doesn't work in combination with
738          * ARB_TEXTURE_RECTANGLE. */
739         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
740                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
741                 && gl_info->supported[EXT_PALETTED_TEXTURE]
742                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
743         {
744             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
745             surface->pow2Width = surface->resource.width;
746             surface->pow2Height = surface->resource.height;
747             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
748         }
749     }
750
751     switch (wined3d_settings.offscreen_rendering_mode)
752     {
753         case ORM_FBO:
754             surface->get_drawable_size = get_drawable_size_fbo;
755             break;
756
757         case ORM_BACKBUFFER:
758             surface->get_drawable_size = get_drawable_size_backbuffer;
759             break;
760
761         default:
762             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
763             return WINED3DERR_INVALIDCALL;
764     }
765
766     surface->flags |= SFLAG_INSYSMEM;
767
768     return WINED3D_OK;
769 }
770
771 static void surface_realize_palette(struct wined3d_surface *surface)
772 {
773     struct wined3d_palette *palette = surface->palette;
774
775     TRACE("surface %p.\n", surface);
776
777     if (!palette) return;
778
779     if (surface->resource.format->id == WINED3DFMT_P8_UINT
780             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
781     {
782         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
783         {
784             /* Make sure the texture is up to date. This call doesn't do
785              * anything if the texture is already up to date. */
786             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
787
788             /* We want to force a palette refresh, so mark the drawable as not being up to date */
789             if (!surface_is_offscreen(surface))
790                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
791         }
792         else
793         {
794             if (!(surface->flags & SFLAG_INSYSMEM))
795             {
796                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
797                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
798             }
799             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
800         }
801     }
802
803     if (surface->flags & SFLAG_DIBSECTION)
804     {
805         RGBQUAD col[256];
806         unsigned int i;
807
808         TRACE("Updating the DC's palette.\n");
809
810         for (i = 0; i < 256; ++i)
811         {
812             col[i].rgbRed   = palette->palents[i].peRed;
813             col[i].rgbGreen = palette->palents[i].peGreen;
814             col[i].rgbBlue  = palette->palents[i].peBlue;
815             col[i].rgbReserved = 0;
816         }
817         SetDIBColorTable(surface->hDC, 0, 256, col);
818     }
819
820     /* Propagate the changes to the drawable when we have a palette. */
821     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
822         surface_load_location(surface, surface->draw_binding, NULL);
823 }
824
825 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
826 {
827     HRESULT hr;
828
829     /* If there's no destination surface there is nothing to do. */
830     if (!surface->overlay_dest)
831         return WINED3D_OK;
832
833     /* Blt calls ModifyLocation on the dest surface, which in turn calls
834      * DrawOverlay to update the overlay. Prevent an endless recursion. */
835     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
836         return WINED3D_OK;
837
838     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
839     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
840             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3DTEXF_LINEAR);
841     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
842
843     return hr;
844 }
845
846 static void surface_preload(struct wined3d_surface *surface)
847 {
848     TRACE("surface %p.\n", surface);
849
850     surface_internal_preload(surface, SRGB_ANY);
851 }
852
853 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
854 {
855     struct wined3d_device *device = surface->resource.device;
856     const RECT *pass_rect = rect;
857
858     TRACE("surface %p, rect %s, flags %#x.\n",
859             surface, wine_dbgstr_rect(rect), flags);
860
861     if (flags & WINED3DLOCK_DISCARD)
862     {
863         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
864         surface_prepare_system_memory(surface);
865         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
866     }
867     else
868     {
869         /* surface_load_location() does not check if the rectangle specifies
870          * the full surface. Most callers don't need that, so do it here. */
871         if (rect && !rect->top && !rect->left
872                 && rect->right == surface->resource.width
873                 && rect->bottom == surface->resource.height)
874             pass_rect = NULL;
875
876         if (!(wined3d_settings.rendertargetlock_mode == RTL_DISABLE
877                 && ((surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
878                 || surface == device->fb.render_targets[0])))
879             surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
880     }
881
882     if (surface->flags & SFLAG_PBO)
883     {
884         const struct wined3d_gl_info *gl_info;
885         struct wined3d_context *context;
886
887         context = context_acquire(device, NULL);
888         gl_info = context->gl_info;
889
890         ENTER_GL();
891         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
892         checkGLcall("glBindBufferARB");
893
894         /* This shouldn't happen but could occur if some other function
895          * didn't handle the PBO properly. */
896         if (surface->resource.allocatedMemory)
897             ERR("The surface already has PBO memory allocated.\n");
898
899         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
900         checkGLcall("glMapBufferARB");
901
902         /* Make sure the PBO isn't set anymore in order not to break non-PBO
903          * calls. */
904         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
905         checkGLcall("glBindBufferARB");
906
907         LEAVE_GL();
908         context_release(context);
909     }
910
911     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
912     {
913         if (!rect)
914             surface_add_dirty_rect(surface, NULL);
915         else
916         {
917             WINED3DBOX b;
918
919             b.Left = rect->left;
920             b.Top = rect->top;
921             b.Right = rect->right;
922             b.Bottom = rect->bottom;
923             b.Front = 0;
924             b.Back = 1;
925             surface_add_dirty_rect(surface, &b);
926         }
927     }
928 }
929
930 static void surface_unmap(struct wined3d_surface *surface)
931 {
932     struct wined3d_device *device = surface->resource.device;
933     BOOL fullsurface;
934
935     TRACE("surface %p.\n", surface);
936
937     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
938
939     if (surface->flags & SFLAG_PBO)
940     {
941         const struct wined3d_gl_info *gl_info;
942         struct wined3d_context *context;
943
944         TRACE("Freeing PBO memory.\n");
945
946         context = context_acquire(device, NULL);
947         gl_info = context->gl_info;
948
949         ENTER_GL();
950         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
951         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
952         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
953         checkGLcall("glUnmapBufferARB");
954         LEAVE_GL();
955         context_release(context);
956
957         surface->resource.allocatedMemory = NULL;
958     }
959
960     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
961
962     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
963     {
964         TRACE("Not dirtified, nothing to do.\n");
965         goto done;
966     }
967
968     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
969             || (device->fb.render_targets && surface == device->fb.render_targets[0]))
970     {
971         if (wined3d_settings.rendertargetlock_mode == RTL_DISABLE)
972         {
973             static BOOL warned = FALSE;
974             if (!warned)
975             {
976                 ERR("The application tries to write to the render target, but render target locking is disabled.\n");
977                 warned = TRUE;
978             }
979             goto done;
980         }
981
982         if (!surface->dirtyRect.left && !surface->dirtyRect.top
983                 && surface->dirtyRect.right == surface->resource.width
984                 && surface->dirtyRect.bottom == surface->resource.height)
985         {
986             fullsurface = TRUE;
987         }
988         else
989         {
990             /* TODO: Proper partial rectangle tracking. */
991             fullsurface = FALSE;
992             surface->flags |= SFLAG_INSYSMEM;
993         }
994
995         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
996
997         /* Partial rectangle tracking is not commonly implemented, it is only
998          * done for render targets. INSYSMEM was set before to tell
999          * surface_load_location() where to read the rectangle from.
1000          * Indrawable is set because all modifications from the partial
1001          * sysmem copy are written back to the drawable, thus the surface is
1002          * merged again in the drawable. The sysmem copy is not fully up to
1003          * date because only a subrectangle was read in Map(). */
1004         if (!fullsurface)
1005         {
1006             surface_modify_location(surface, surface->draw_binding, TRUE);
1007             surface_evict_sysmem(surface);
1008         }
1009
1010         surface->dirtyRect.left = surface->resource.width;
1011         surface->dirtyRect.top = surface->resource.height;
1012         surface->dirtyRect.right = 0;
1013         surface->dirtyRect.bottom = 0;
1014     }
1015     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
1016     {
1017         FIXME("Depth / stencil buffer locking is not implemented.\n");
1018     }
1019
1020 done:
1021     /* Overlays have to be redrawn manually after changes with the GL implementation */
1022     if (surface->overlay_dest)
1023         surface->surface_ops->surface_draw_overlay(surface);
1024 }
1025
1026 static HRESULT surface_getdc(struct wined3d_surface *surface)
1027 {
1028     WINED3DLOCKED_RECT lock;
1029     HRESULT hr;
1030
1031     TRACE("surface %p.\n", surface);
1032
1033     /* Create a DIB section if there isn't a dc yet. */
1034     if (!surface->hDC)
1035     {
1036         if (surface->flags & SFLAG_CLIENT)
1037         {
1038             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1039             surface_release_client_storage(surface);
1040         }
1041         hr = surface_create_dib_section(surface);
1042         if (FAILED(hr))
1043             return WINED3DERR_INVALIDCALL;
1044
1045         /* Use the DIB section from now on if we are not using a PBO. */
1046         if (!(surface->flags & SFLAG_PBO))
1047             surface->resource.allocatedMemory = surface->dib.bitmap_data;
1048     }
1049
1050     /* Map the surface. */
1051     hr = wined3d_surface_map(surface, &lock, NULL, 0);
1052     if (FAILED(hr))
1053         ERR("Map failed, hr %#x.\n", hr);
1054
1055     /* Sync the DIB with the PBO. This can't be done earlier because Map()
1056      * activates the allocatedMemory. */
1057     if (surface->flags & SFLAG_PBO)
1058         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
1059
1060     return hr;
1061 }
1062
1063 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1064 {
1065     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1066         return FALSE;
1067     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1068         return FALSE;
1069     return TRUE;
1070 }
1071
1072 static void wined3d_surface_depth_blt_fbo(struct wined3d_device *device, struct wined3d_surface *src_surface,
1073         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1074 {
1075     const struct wined3d_gl_info *gl_info;
1076     struct wined3d_context *context;
1077     DWORD src_mask, dst_mask;
1078     GLbitfield gl_mask;
1079
1080     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1081             device, src_surface, wine_dbgstr_rect(src_rect),
1082             dst_surface, wine_dbgstr_rect(dst_rect));
1083
1084     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1085     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1086
1087     if (src_mask != dst_mask)
1088     {
1089         ERR("Incompatible formats %s and %s.\n",
1090                 debug_d3dformat(src_surface->resource.format->id),
1091                 debug_d3dformat(dst_surface->resource.format->id));
1092         return;
1093     }
1094
1095     if (!src_mask)
1096     {
1097         ERR("Not a depth / stencil format: %s.\n",
1098                 debug_d3dformat(src_surface->resource.format->id));
1099         return;
1100     }
1101
1102     gl_mask = 0;
1103     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1104         gl_mask |= GL_DEPTH_BUFFER_BIT;
1105     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1106         gl_mask |= GL_STENCIL_BUFFER_BIT;
1107
1108     /* Make sure the locations are up-to-date. Loading the destination
1109      * surface isn't required if the entire surface is overwritten. */
1110     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1111     if (!surface_is_full_rect(dst_surface, dst_rect))
1112         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1113
1114     context = context_acquire(device, NULL);
1115     if (!context->valid)
1116     {
1117         context_release(context);
1118         WARN("Invalid context, skipping blit.\n");
1119         return;
1120     }
1121
1122     gl_info = context->gl_info;
1123
1124     ENTER_GL();
1125
1126     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1127     glReadBuffer(GL_NONE);
1128     checkGLcall("glReadBuffer()");
1129     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1130
1131     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1132     context_set_draw_buffer(context, GL_NONE);
1133     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1134
1135     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1136     {
1137         glDepthMask(GL_TRUE);
1138         context_invalidate_state(context, STATE_RENDER(WINED3DRS_ZWRITEENABLE));
1139     }
1140     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1141     {
1142         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1143         {
1144             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1145             context_invalidate_state(context, STATE_RENDER(WINED3DRS_TWOSIDEDSTENCILMODE));
1146         }
1147         glStencilMask(~0U);
1148         context_invalidate_state(context, STATE_RENDER(WINED3DRS_STENCILWRITEMASK));
1149     }
1150
1151     glDisable(GL_SCISSOR_TEST);
1152     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1153
1154     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1155             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1156     checkGLcall("glBlitFramebuffer()");
1157
1158     LEAVE_GL();
1159
1160     if (wined3d_settings.strict_draw_ordering)
1161         wglFlush(); /* Flush to ensure ordering across contexts. */
1162
1163     context_release(context);
1164 }
1165
1166 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1167  * Depth / stencil is not supported. */
1168 static void surface_blt_fbo(struct wined3d_device *device, const WINED3DTEXTUREFILTERTYPE filter,
1169         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1170         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1171 {
1172     const struct wined3d_gl_info *gl_info;
1173     struct wined3d_context *context;
1174     RECT src_rect, dst_rect;
1175     GLenum gl_filter;
1176     GLenum buffer;
1177
1178     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1179     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1180             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1181     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1182             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1183
1184     src_rect = *src_rect_in;
1185     dst_rect = *dst_rect_in;
1186
1187     switch (filter)
1188     {
1189         case WINED3DTEXF_LINEAR:
1190             gl_filter = GL_LINEAR;
1191             break;
1192
1193         default:
1194             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1195         case WINED3DTEXF_NONE:
1196         case WINED3DTEXF_POINT:
1197             gl_filter = GL_NEAREST;
1198             break;
1199     }
1200
1201     /* Resolve the source surface first if needed. */
1202     if (src_location == SFLAG_INRB_MULTISAMPLE
1203             && (src_surface->resource.format->id != dst_surface->resource.format->id
1204                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1205                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1206         src_location = SFLAG_INRB_RESOLVED;
1207
1208     /* Make sure the locations are up-to-date. Loading the destination
1209      * surface isn't required if the entire surface is overwritten. (And is
1210      * in fact harmful if we're being called by surface_load_location() with
1211      * the purpose of loading the destination surface.) */
1212     surface_load_location(src_surface, src_location, NULL);
1213     if (!surface_is_full_rect(dst_surface, &dst_rect))
1214         surface_load_location(dst_surface, dst_location, NULL);
1215
1216     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1217     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1218     else context = context_acquire(device, NULL);
1219
1220     if (!context->valid)
1221     {
1222         context_release(context);
1223         WARN("Invalid context, skipping blit.\n");
1224         return;
1225     }
1226
1227     gl_info = context->gl_info;
1228
1229     if (src_location == SFLAG_INDRAWABLE)
1230     {
1231         TRACE("Source surface %p is onscreen.\n", src_surface);
1232         buffer = surface_get_gl_buffer(src_surface);
1233         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1234     }
1235     else
1236     {
1237         TRACE("Source surface %p is offscreen.\n", src_surface);
1238         buffer = GL_COLOR_ATTACHMENT0;
1239     }
1240
1241     ENTER_GL();
1242     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1243     glReadBuffer(buffer);
1244     checkGLcall("glReadBuffer()");
1245     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1246     LEAVE_GL();
1247
1248     if (dst_location == SFLAG_INDRAWABLE)
1249     {
1250         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1251         buffer = surface_get_gl_buffer(dst_surface);
1252         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1253     }
1254     else
1255     {
1256         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1257         buffer = GL_COLOR_ATTACHMENT0;
1258     }
1259
1260     ENTER_GL();
1261     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1262     context_set_draw_buffer(context, buffer);
1263     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1264     context_invalidate_state(context, STATE_FRAMEBUFFER);
1265
1266     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1267     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE));
1268     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE1));
1269     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE2));
1270     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE3));
1271
1272     glDisable(GL_SCISSOR_TEST);
1273     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1274
1275     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1276             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1277     checkGLcall("glBlitFramebuffer()");
1278
1279     LEAVE_GL();
1280
1281     if (wined3d_settings.strict_draw_ordering
1282             || (dst_location == SFLAG_INDRAWABLE
1283             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1284         wglFlush();
1285
1286     context_release(context);
1287 }
1288
1289 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1290         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
1291         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
1292 {
1293     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1294         return FALSE;
1295
1296     /* Source and/or destination need to be on the GL side */
1297     if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
1298         return FALSE;
1299
1300     switch (blit_op)
1301     {
1302         case WINED3D_BLIT_OP_COLOR_BLIT:
1303             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1304                 return FALSE;
1305             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1306                 return FALSE;
1307             break;
1308
1309         case WINED3D_BLIT_OP_DEPTH_BLIT:
1310             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1311                 return FALSE;
1312             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1313                 return FALSE;
1314             break;
1315
1316         default:
1317             return FALSE;
1318     }
1319
1320     if (!(src_format->id == dst_format->id
1321             || (is_identity_fixup(src_format->color_fixup)
1322             && is_identity_fixup(dst_format->color_fixup))))
1323         return FALSE;
1324
1325     return TRUE;
1326 }
1327
1328 /* This function checks if the primary render target uses the 8bit paletted format. */
1329 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1330 {
1331     if (device->fb.render_targets && device->fb.render_targets[0])
1332     {
1333         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1334         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1335                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1336             return TRUE;
1337     }
1338     return FALSE;
1339 }
1340
1341 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1342         DWORD color, WINED3DCOLORVALUE *float_color)
1343 {
1344     const struct wined3d_format *format = surface->resource.format;
1345     const struct wined3d_device *device = surface->resource.device;
1346
1347     switch (format->id)
1348     {
1349         case WINED3DFMT_P8_UINT:
1350             if (surface->palette)
1351             {
1352                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1353                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1354                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1355             }
1356             else
1357             {
1358                 float_color->r = 0.0f;
1359                 float_color->g = 0.0f;
1360                 float_color->b = 0.0f;
1361             }
1362             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1363             break;
1364
1365         case WINED3DFMT_B5G6R5_UNORM:
1366             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1367             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1368             float_color->b = (color & 0x1f) / 31.0f;
1369             float_color->a = 1.0f;
1370             break;
1371
1372         case WINED3DFMT_B8G8R8_UNORM:
1373         case WINED3DFMT_B8G8R8X8_UNORM:
1374             float_color->r = D3DCOLOR_R(color);
1375             float_color->g = D3DCOLOR_G(color);
1376             float_color->b = D3DCOLOR_B(color);
1377             float_color->a = 1.0f;
1378             break;
1379
1380         case WINED3DFMT_B8G8R8A8_UNORM:
1381             float_color->r = D3DCOLOR_R(color);
1382             float_color->g = D3DCOLOR_G(color);
1383             float_color->b = D3DCOLOR_B(color);
1384             float_color->a = D3DCOLOR_A(color);
1385             break;
1386
1387         default:
1388             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1389             return FALSE;
1390     }
1391
1392     return TRUE;
1393 }
1394
1395 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1396 {
1397     const struct wined3d_format *format = surface->resource.format;
1398
1399     switch (format->id)
1400     {
1401         case WINED3DFMT_S1_UINT_D15_UNORM:
1402             *float_depth = depth / (float)0x00007fff;
1403             break;
1404
1405         case WINED3DFMT_D16_UNORM:
1406             *float_depth = depth / (float)0x0000ffff;
1407             break;
1408
1409         case WINED3DFMT_D24_UNORM_S8_UINT:
1410         case WINED3DFMT_X8D24_UNORM:
1411             *float_depth = depth / (float)0x00ffffff;
1412             break;
1413
1414         case WINED3DFMT_D32_UNORM:
1415             *float_depth = depth / (float)0xffffffff;
1416             break;
1417
1418         default:
1419             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1420             return FALSE;
1421     }
1422
1423     return TRUE;
1424 }
1425
1426 /* Do not call while under the GL lock. */
1427 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1428 {
1429     const struct wined3d_resource *resource = &surface->resource;
1430     struct wined3d_device *device = resource->device;
1431     const struct blit_shader *blitter;
1432
1433     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1434             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1435     if (!blitter)
1436     {
1437         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1438         return WINED3DERR_INVALIDCALL;
1439     }
1440
1441     return blitter->depth_fill(device, surface, rect, depth);
1442 }
1443
1444 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1445         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1446 {
1447     struct wined3d_device *device = src_surface->resource.device;
1448
1449     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1450             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1451             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1452         return WINED3DERR_INVALIDCALL;
1453
1454     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1455
1456     surface_modify_ds_location(dst_surface, SFLAG_DS_OFFSCREEN,
1457             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1458     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
1459
1460     return WINED3D_OK;
1461 }
1462
1463 /* Do not call while under the GL lock. */
1464 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1465         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1466         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1467 {
1468     struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1469     struct wined3d_device *device = dst_surface->resource.device;
1470     DWORD src_ds_flags, dst_ds_flags;
1471     RECT src_rect, dst_rect;
1472
1473     static const DWORD simple_blit = WINEDDBLT_ASYNC
1474             | WINEDDBLT_COLORFILL
1475             | WINEDDBLT_WAIT
1476             | WINEDDBLT_DEPTHFILL
1477             | WINEDDBLT_DONOTWAIT;
1478
1479     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1480             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1481             flags, fx, debug_d3dtexturefiltertype(filter));
1482     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1483
1484     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1485     {
1486         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1487         return WINEDDERR_SURFACEBUSY;
1488     }
1489
1490     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1491
1492     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1493             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1494             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1495             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1496             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1497     {
1498         /* The destination rect can be out of bounds on the condition
1499          * that a clipper is set for the surface. */
1500         if (dst_surface->clipper)
1501             FIXME("Blit clipping not implemented.\n");
1502         else
1503             WARN("The application gave us a bad destination rectangle without a clipper set.\n");
1504         return WINEDDERR_INVALIDRECT;
1505     }
1506
1507     if (src_surface)
1508     {
1509         surface_get_rect(src_surface, src_rect_in, &src_rect);
1510
1511         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1512                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1513                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1514                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1515                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1516         {
1517             WARN("Application gave us bad source rectangle for Blt.\n");
1518             return WINEDDERR_INVALIDRECT;
1519         }
1520     }
1521     else
1522     {
1523         memset(&src_rect, 0, sizeof(src_rect));
1524     }
1525
1526     if (!fx || !(fx->dwDDFX))
1527         flags &= ~WINEDDBLT_DDFX;
1528
1529     if (flags & WINEDDBLT_WAIT)
1530         flags &= ~WINEDDBLT_WAIT;
1531
1532     if (flags & WINEDDBLT_ASYNC)
1533     {
1534         static unsigned int once;
1535
1536         if (!once++)
1537             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1538         flags &= ~WINEDDBLT_ASYNC;
1539     }
1540
1541     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1542     if (flags & WINEDDBLT_DONOTWAIT)
1543     {
1544         static unsigned int once;
1545
1546         if (!once++)
1547             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1548         flags &= ~WINEDDBLT_DONOTWAIT;
1549     }
1550
1551     if (!device->d3d_initialized)
1552     {
1553         WARN("D3D not initialized, using fallback.\n");
1554         goto cpu;
1555     }
1556
1557     /* We want to avoid invalidating the sysmem location for converted
1558      * surfaces, since otherwise we'd have to convert the data back when
1559      * locking them. */
1560     if (dst_surface->flags & SFLAG_CONVERTED)
1561     {
1562         WARN("Converted surface, using CPU blit.\n");
1563         return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1564     }
1565
1566     if (flags & ~simple_blit)
1567     {
1568         WARN("Using fallback for complex blit (%#x).\n", flags);
1569         goto fallback;
1570     }
1571
1572     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1573         src_swapchain = src_surface->container.u.swapchain;
1574     else
1575         src_swapchain = NULL;
1576
1577     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1578         dst_swapchain = dst_surface->container.u.swapchain;
1579     else
1580         dst_swapchain = NULL;
1581
1582     /* This isn't strictly needed. FBO blits for example could deal with
1583      * cross-swapchain blits by first downloading the source to a texture
1584      * before switching to the destination context. We just have this here to
1585      * not have to deal with the issue, since cross-swapchain blits should be
1586      * rare. */
1587     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1588     {
1589         FIXME("Using fallback for cross-swapchain blit.\n");
1590         goto fallback;
1591     }
1592
1593     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1594     if (src_surface)
1595         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1596     else
1597         src_ds_flags = 0;
1598
1599     if (src_ds_flags || dst_ds_flags)
1600     {
1601         if (flags & WINEDDBLT_DEPTHFILL)
1602         {
1603             float depth;
1604
1605             TRACE("Depth fill.\n");
1606
1607             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1608                 return WINED3DERR_INVALIDCALL;
1609
1610             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1611                 return WINED3D_OK;
1612         }
1613         else
1614         {
1615             /* Accessing depth / stencil surfaces is supposed to fail while in
1616              * a scene, except for fills, which seem to work. */
1617             if (device->inScene)
1618             {
1619                 WARN("Rejecting depth / stencil access while in scene.\n");
1620                 return WINED3DERR_INVALIDCALL;
1621             }
1622
1623             if (src_ds_flags != dst_ds_flags)
1624             {
1625                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1626                 return WINED3DERR_INVALIDCALL;
1627             }
1628
1629             if (src_rect.top || src_rect.left
1630                     || src_rect.bottom != src_surface->resource.height
1631                     || src_rect.right != src_surface->resource.width)
1632             {
1633                 WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
1634                         wine_dbgstr_rect(&src_rect));
1635                 return WINED3DERR_INVALIDCALL;
1636             }
1637
1638             if (dst_rect.top || dst_rect.left
1639                     || dst_rect.bottom != dst_surface->resource.height
1640                     || dst_rect.right != dst_surface->resource.width)
1641             {
1642                 WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
1643                         wine_dbgstr_rect(&src_rect));
1644                 return WINED3DERR_INVALIDCALL;
1645             }
1646
1647             if (src_surface->resource.height != dst_surface->resource.height
1648                     || src_surface->resource.width != dst_surface->resource.width)
1649             {
1650                 WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
1651                 return WINED3DERR_INVALIDCALL;
1652             }
1653
1654             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1655                 return WINED3D_OK;
1656         }
1657     }
1658     else
1659     {
1660         if (flags & WINEDDBLT_COLORFILL)
1661         {
1662             WINED3DCOLORVALUE color;
1663
1664             TRACE("Color fill.\n");
1665
1666             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1667                 goto fallback;
1668
1669             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1670                 return WINED3D_OK;
1671         }
1672         else
1673         {
1674             TRACE("Color blit.\n");
1675
1676             /* Use present for back -> front blits. The idea behind this is
1677              * that present is potentially faster than a blit, in particular
1678              * when FBO blits aren't available. Some ddraw applications like
1679              * Half-Life and Prince of Persia 3D use Blt() from the backbuffer
1680              * to the frontbuffer instead of doing a Flip(). D3D8 and D3D9
1681              * applications can't blit directly to the frontbuffer. */
1682             if (dst_swapchain && dst_swapchain->back_buffers
1683                     && dst_surface == dst_swapchain->front_buffer
1684                     && src_surface == dst_swapchain->back_buffers[0])
1685             {
1686                 WINED3DSWAPEFFECT swap_effect = dst_swapchain->presentParms.SwapEffect;
1687
1688                 TRACE("Using present for backbuffer -> frontbuffer blit.\n");
1689
1690                 /* Set the swap effect to COPY, we don't want the backbuffer
1691                  * to become undefined. */
1692                 dst_swapchain->presentParms.SwapEffect = WINED3DSWAPEFFECT_COPY;
1693                 wined3d_swapchain_present(dst_swapchain, NULL, NULL, dst_swapchain->win_handle, NULL, 0);
1694                 dst_swapchain->presentParms.SwapEffect = swap_effect;
1695
1696                 return WINED3D_OK;
1697             }
1698
1699             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1700                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1701                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1702             {
1703                 TRACE("Using FBO blit.\n");
1704
1705                 surface_blt_fbo(device, filter,
1706                         src_surface, src_surface->draw_binding, &src_rect,
1707                         dst_surface, dst_surface->draw_binding, &dst_rect);
1708                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1709                 return WINED3D_OK;
1710             }
1711
1712             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1713                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1714                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1715             {
1716                 TRACE("Using arbfp blit.\n");
1717
1718                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1719                     return WINED3D_OK;
1720             }
1721         }
1722     }
1723
1724 fallback:
1725
1726     /* Special cases for render targets. */
1727     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1728             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1729     {
1730         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1731                 src_surface, &src_rect, flags, fx, filter)))
1732             return WINED3D_OK;
1733     }
1734
1735 cpu:
1736
1737     /* For the rest call the X11 surface implementation. For render targets
1738      * this should be implemented OpenGL accelerated in BltOverride, other
1739      * blits are rather rare. */
1740     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1741 }
1742
1743 /* Do not call while under the GL lock. */
1744 HRESULT CDECL wined3d_surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
1745         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD trans)
1746 {
1747     RECT src_rect, dst_rect;
1748     DWORD flags = 0;
1749
1750     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect_in %s, trans %#x.\n",
1751             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect_in), trans);
1752
1753     surface_get_rect(src_surface, src_rect_in, &src_rect);
1754
1755     dst_rect.left = dst_x;
1756     dst_rect.top = dst_y;
1757     dst_rect.right = dst_x + src_rect.right - src_rect.left;
1758     dst_rect.bottom = dst_y + src_rect.bottom - src_rect.top;
1759
1760     if (trans & WINEDDBLTFAST_SRCCOLORKEY)
1761         flags |= WINEDDBLT_KEYSRC;
1762     if (trans & WINEDDBLTFAST_DESTCOLORKEY)
1763         flags |= WINEDDBLT_KEYDEST;
1764     if (trans & WINEDDBLTFAST_WAIT)
1765         flags |= WINEDDBLT_WAIT;
1766     if (trans & WINEDDBLTFAST_DONOTWAIT)
1767         flags |= WINEDDBLT_DONOTWAIT;
1768
1769     return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, NULL, WINED3DTEXF_POINT);
1770 }
1771
1772 /* Context activation is done by the caller. */
1773 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1774 {
1775     if (!surface->resource.heapMemory)
1776     {
1777         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1778         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1779                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1780     }
1781
1782     ENTER_GL();
1783     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1784     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1785     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1786             surface->resource.size, surface->resource.allocatedMemory));
1787     checkGLcall("glGetBufferSubDataARB");
1788     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1789     checkGLcall("glDeleteBuffersARB");
1790     LEAVE_GL();
1791
1792     surface->pbo = 0;
1793     surface->flags &= ~SFLAG_PBO;
1794 }
1795
1796 /* Do not call while under the GL lock. */
1797 static void surface_unload(struct wined3d_resource *resource)
1798 {
1799     struct wined3d_surface *surface = surface_from_resource(resource);
1800     struct wined3d_renderbuffer_entry *entry, *entry2;
1801     struct wined3d_device *device = resource->device;
1802     const struct wined3d_gl_info *gl_info;
1803     struct wined3d_context *context;
1804
1805     TRACE("surface %p.\n", surface);
1806
1807     if (resource->pool == WINED3DPOOL_DEFAULT)
1808     {
1809         /* Default pool resources are supposed to be destroyed before Reset is called.
1810          * Implicit resources stay however. So this means we have an implicit render target
1811          * or depth stencil. The content may be destroyed, but we still have to tear down
1812          * opengl resources, so we cannot leave early.
1813          *
1814          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1815          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1816          * or the depth stencil into an FBO the texture or render buffer will be removed
1817          * and all flags get lost
1818          */
1819         surface_init_sysmem(surface);
1820     }
1821     else
1822     {
1823         /* Load the surface into system memory */
1824         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1825         surface_modify_location(surface, surface->draw_binding, FALSE);
1826     }
1827     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1828     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1829     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1830
1831     context = context_acquire(device, NULL);
1832     gl_info = context->gl_info;
1833
1834     /* Destroy PBOs, but load them into real sysmem before */
1835     if (surface->flags & SFLAG_PBO)
1836         surface_remove_pbo(surface, gl_info);
1837
1838     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1839      * all application-created targets the application has to release the surface
1840      * before calling _Reset
1841      */
1842     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1843     {
1844         ENTER_GL();
1845         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1846         LEAVE_GL();
1847         list_remove(&entry->entry);
1848         HeapFree(GetProcessHeap(), 0, entry);
1849     }
1850     list_init(&surface->renderbuffers);
1851     surface->current_renderbuffer = NULL;
1852
1853     ENTER_GL();
1854
1855     /* If we're in a texture, the texture name belongs to the texture.
1856      * Otherwise, destroy it. */
1857     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1858     {
1859         glDeleteTextures(1, &surface->texture_name);
1860         surface->texture_name = 0;
1861         glDeleteTextures(1, &surface->texture_name_srgb);
1862         surface->texture_name_srgb = 0;
1863     }
1864     if (surface->rb_multisample)
1865     {
1866         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1867         surface->rb_multisample = 0;
1868     }
1869     if (surface->rb_resolved)
1870     {
1871         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1872         surface->rb_resolved = 0;
1873     }
1874
1875     LEAVE_GL();
1876
1877     context_release(context);
1878
1879     resource_unload(resource);
1880 }
1881
1882 static const struct wined3d_resource_ops surface_resource_ops =
1883 {
1884     surface_unload,
1885 };
1886
1887 static const struct wined3d_surface_ops surface_ops =
1888 {
1889     surface_private_setup,
1890     surface_cleanup,
1891     surface_realize_palette,
1892     surface_draw_overlay,
1893     surface_preload,
1894     surface_map,
1895     surface_unmap,
1896     surface_getdc,
1897 };
1898
1899 /*****************************************************************************
1900  * Initializes the GDI surface, aka creates the DIB section we render to
1901  * The DIB section creation is done by calling GetDC, which will create the
1902  * section and releasing the dc to allow the app to use it. The dib section
1903  * will stay until the surface is released
1904  *
1905  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1906  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1907  * avoid confusion in the shared surface code.
1908  *
1909  * Returns:
1910  *  WINED3D_OK on success
1911  *  The return values of called methods on failure
1912  *
1913  *****************************************************************************/
1914 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1915 {
1916     HRESULT hr;
1917
1918     TRACE("surface %p.\n", surface);
1919
1920     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1921     {
1922         ERR("Overlays not yet supported by GDI surfaces.\n");
1923         return WINED3DERR_INVALIDCALL;
1924     }
1925
1926     /* Sysmem textures have memory already allocated - release it,
1927      * this avoids an unnecessary memcpy. */
1928     hr = surface_create_dib_section(surface);
1929     if (SUCCEEDED(hr))
1930     {
1931         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1932         surface->resource.heapMemory = NULL;
1933         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1934     }
1935
1936     /* We don't mind the nonpow2 stuff in GDI. */
1937     surface->pow2Width = surface->resource.width;
1938     surface->pow2Height = surface->resource.height;
1939
1940     return WINED3D_OK;
1941 }
1942
1943 static void surface_gdi_cleanup(struct wined3d_surface *surface)
1944 {
1945     TRACE("surface %p.\n", surface);
1946
1947     if (surface->flags & SFLAG_DIBSECTION)
1948     {
1949         /* Release the DC. */
1950         SelectObject(surface->hDC, surface->dib.holdbitmap);
1951         DeleteDC(surface->hDC);
1952         /* Release the DIB section. */
1953         DeleteObject(surface->dib.DIBsection);
1954         surface->dib.bitmap_data = NULL;
1955         surface->resource.allocatedMemory = NULL;
1956     }
1957
1958     if (surface->flags & SFLAG_USERPTR)
1959         wined3d_surface_set_mem(surface, NULL);
1960     if (surface->overlay_dest)
1961         list_remove(&surface->overlay_entry);
1962
1963     HeapFree(GetProcessHeap(), 0, surface->palette9);
1964
1965     resource_cleanup(&surface->resource);
1966 }
1967
1968 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1969 {
1970     struct wined3d_palette *palette = surface->palette;
1971
1972     TRACE("surface %p.\n", surface);
1973
1974     if (!palette) return;
1975
1976     if (surface->flags & SFLAG_DIBSECTION)
1977     {
1978         RGBQUAD col[256];
1979         unsigned int i;
1980
1981         TRACE("Updating the DC's palette.\n");
1982
1983         for (i = 0; i < 256; ++i)
1984         {
1985             col[i].rgbRed = palette->palents[i].peRed;
1986             col[i].rgbGreen = palette->palents[i].peGreen;
1987             col[i].rgbBlue = palette->palents[i].peBlue;
1988             col[i].rgbReserved = 0;
1989         }
1990         SetDIBColorTable(surface->hDC, 0, 256, col);
1991     }
1992
1993     /* Update the image because of the palette change. Some games like e.g.
1994      * Red Alert call SetEntries a lot to implement fading. */
1995     /* Tell the swapchain to update the screen. */
1996     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1997     {
1998         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1999         if (surface == swapchain->front_buffer)
2000         {
2001             x11_copy_to_screen(swapchain, NULL);
2002         }
2003     }
2004 }
2005
2006 static HRESULT gdi_surface_draw_overlay(struct wined3d_surface *surface)
2007 {
2008     FIXME("GDI surfaces can't draw overlays yet.\n");
2009     return E_FAIL;
2010 }
2011
2012 static void gdi_surface_preload(struct wined3d_surface *surface)
2013 {
2014     TRACE("surface %p.\n", surface);
2015
2016     ERR("Preloading GDI surfaces is not supported.\n");
2017 }
2018
2019 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
2020 {
2021     TRACE("surface %p, rect %s, flags %#x.\n",
2022             surface, wine_dbgstr_rect(rect), flags);
2023
2024     if (!surface->resource.allocatedMemory)
2025     {
2026         /* This happens on gdi surfaces if the application set a user pointer
2027          * and resets it. Recreate the DIB section. */
2028         surface_create_dib_section(surface);
2029         surface->resource.allocatedMemory = surface->dib.bitmap_data;
2030     }
2031 }
2032
2033 static void gdi_surface_unmap(struct wined3d_surface *surface)
2034 {
2035     TRACE("surface %p.\n", surface);
2036
2037     /* Tell the swapchain to update the screen. */
2038     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2039     {
2040         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2041         if (surface == swapchain->front_buffer)
2042         {
2043             x11_copy_to_screen(swapchain, &surface->lockedRect);
2044         }
2045     }
2046
2047     memset(&surface->lockedRect, 0, sizeof(RECT));
2048 }
2049
2050 static HRESULT gdi_surface_getdc(struct wined3d_surface *surface)
2051 {
2052     WINED3DLOCKED_RECT lock;
2053     HRESULT hr;
2054
2055     TRACE("surface %p.\n", surface);
2056
2057     /* Should have a DIB section already. */
2058     if (!(surface->flags & SFLAG_DIBSECTION))
2059     {
2060         WARN("DC not supported on this surface\n");
2061         return WINED3DERR_INVALIDCALL;
2062     }
2063
2064     /* Map the surface. */
2065     hr = wined3d_surface_map(surface, &lock, NULL, 0);
2066     if (FAILED(hr))
2067         ERR("Map failed, hr %#x.\n", hr);
2068
2069     return hr;
2070 }
2071
2072 static const struct wined3d_surface_ops gdi_surface_ops =
2073 {
2074     gdi_surface_private_setup,
2075     surface_gdi_cleanup,
2076     gdi_surface_realize_palette,
2077     gdi_surface_draw_overlay,
2078     gdi_surface_preload,
2079     gdi_surface_map,
2080     gdi_surface_unmap,
2081     gdi_surface_getdc,
2082 };
2083
2084 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2085 {
2086     GLuint *name;
2087     DWORD flag;
2088
2089     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2090
2091     if(srgb)
2092     {
2093         name = &surface->texture_name_srgb;
2094         flag = SFLAG_INSRGBTEX;
2095     }
2096     else
2097     {
2098         name = &surface->texture_name;
2099         flag = SFLAG_INTEXTURE;
2100     }
2101
2102     if (!*name && new_name)
2103     {
2104         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2105          * surface has no texture name yet. See if we can get rid of this. */
2106         if (surface->flags & flag)
2107             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2108         surface_modify_location(surface, flag, FALSE);
2109     }
2110
2111     *name = new_name;
2112     surface_force_reload(surface);
2113 }
2114
2115 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
2116 {
2117     TRACE("surface %p, target %#x.\n", surface, target);
2118
2119     if (surface->texture_target != target)
2120     {
2121         if (target == GL_TEXTURE_RECTANGLE_ARB)
2122         {
2123             surface->flags &= ~SFLAG_NORMCOORD;
2124         }
2125         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2126         {
2127             surface->flags |= SFLAG_NORMCOORD;
2128         }
2129     }
2130     surface->texture_target = target;
2131     surface_force_reload(surface);
2132 }
2133
2134 /* Context activation is done by the caller. */
2135 void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
2136 {
2137     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
2138
2139     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2140     {
2141         struct wined3d_texture *texture = surface->container.u.texture;
2142
2143         TRACE("Passing to container (%p).\n", texture);
2144         texture->texture_ops->texture_bind(texture, context, srgb);
2145     }
2146     else
2147     {
2148         if (surface->texture_level)
2149         {
2150             ERR("Standalone surface %p is non-zero texture level %u.\n",
2151                     surface, surface->texture_level);
2152         }
2153
2154         if (srgb)
2155             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
2156
2157         ENTER_GL();
2158
2159         if (!surface->texture_name)
2160         {
2161             glGenTextures(1, &surface->texture_name);
2162             checkGLcall("glGenTextures");
2163
2164             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
2165
2166             context_bind_texture(context, surface->texture_target, surface->texture_name);
2167             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2168             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2169             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
2170             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2171             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2172             checkGLcall("glTexParameteri");
2173         }
2174         else
2175         {
2176             context_bind_texture(context, surface->texture_target, surface->texture_name);
2177         }
2178
2179         LEAVE_GL();
2180     }
2181 }
2182
2183 /* This call just downloads data, the caller is responsible for binding the
2184  * correct texture. */
2185 /* Context activation is done by the caller. */
2186 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2187 {
2188     const struct wined3d_format *format = surface->resource.format;
2189
2190     /* Only support read back of converted P8 surfaces. */
2191     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2192     {
2193         ERR("Trying to read back converted surface %p with format %s.\n", surface, debug_d3dformat(format->id));
2194         return;
2195     }
2196
2197     ENTER_GL();
2198
2199     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2200     {
2201         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2202                 surface, surface->texture_level, format->glFormat, format->glType,
2203                 surface->resource.allocatedMemory);
2204
2205         if (surface->flags & SFLAG_PBO)
2206         {
2207             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2208             checkGLcall("glBindBufferARB");
2209             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2210             checkGLcall("glGetCompressedTexImageARB");
2211             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2212             checkGLcall("glBindBufferARB");
2213         }
2214         else
2215         {
2216             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2217                     surface->texture_level, surface->resource.allocatedMemory));
2218             checkGLcall("glGetCompressedTexImageARB");
2219         }
2220
2221         LEAVE_GL();
2222     }
2223     else
2224     {
2225         void *mem;
2226         GLenum gl_format = format->glFormat;
2227         GLenum gl_type = format->glType;
2228         int src_pitch = 0;
2229         int dst_pitch = 0;
2230
2231         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2232         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2233         {
2234             gl_format = GL_ALPHA;
2235             gl_type = GL_UNSIGNED_BYTE;
2236         }
2237
2238         if (surface->flags & SFLAG_NONPOW2)
2239         {
2240             unsigned char alignment = surface->resource.device->surface_alignment;
2241             src_pitch = format->byte_count * surface->pow2Width;
2242             dst_pitch = wined3d_surface_get_pitch(surface);
2243             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2244             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2245         }
2246         else
2247         {
2248             mem = surface->resource.allocatedMemory;
2249         }
2250
2251         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2252                 surface, surface->texture_level, gl_format, gl_type, mem);
2253
2254         if (surface->flags & SFLAG_PBO)
2255         {
2256             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2257             checkGLcall("glBindBufferARB");
2258
2259             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2260             checkGLcall("glGetTexImage");
2261
2262             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2263             checkGLcall("glBindBufferARB");
2264         }
2265         else
2266         {
2267             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2268             checkGLcall("glGetTexImage");
2269         }
2270         LEAVE_GL();
2271
2272         if (surface->flags & SFLAG_NONPOW2)
2273         {
2274             const BYTE *src_data;
2275             BYTE *dst_data;
2276             UINT y;
2277             /*
2278              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2279              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2280              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2281              *
2282              * We're doing this...
2283              *
2284              * instead of boxing the texture :
2285              * |<-texture width ->|  -->pow2width|   /\
2286              * |111111111111111111|              |   |
2287              * |222 Texture 222222| boxed empty  | texture height
2288              * |3333 Data 33333333|              |   |
2289              * |444444444444444444|              |   \/
2290              * -----------------------------------   |
2291              * |     boxed  empty | boxed empty  | pow2height
2292              * |                  |              |   \/
2293              * -----------------------------------
2294              *
2295              *
2296              * we're repacking the data to the expected texture width
2297              *
2298              * |<-texture width ->|  -->pow2width|   /\
2299              * |111111111111111111222222222222222|   |
2300              * |222333333333333333333444444444444| texture height
2301              * |444444                           |   |
2302              * |                                 |   \/
2303              * |                                 |   |
2304              * |            empty                | pow2height
2305              * |                                 |   \/
2306              * -----------------------------------
2307              *
2308              * == is the same as
2309              *
2310              * |<-texture width ->|    /\
2311              * |111111111111111111|
2312              * |222222222222222222|texture height
2313              * |333333333333333333|
2314              * |444444444444444444|    \/
2315              * --------------------
2316              *
2317              * this also means that any references to allocatedMemory should work with the data as if were a
2318              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2319              *
2320              * internally the texture is still stored in a boxed format so any references to textureName will
2321              * get a boxed texture with width pow2width and not a texture of width resource.width.
2322              *
2323              * Performance should not be an issue, because applications normally do not lock the surfaces when
2324              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2325              * and doesn't have to be re-read. */
2326             src_data = mem;
2327             dst_data = surface->resource.allocatedMemory;
2328             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2329             for (y = 1; y < surface->resource.height; ++y)
2330             {
2331                 /* skip the first row */
2332                 src_data += src_pitch;
2333                 dst_data += dst_pitch;
2334                 memcpy(dst_data, src_data, dst_pitch);
2335             }
2336
2337             HeapFree(GetProcessHeap(), 0, mem);
2338         }
2339     }
2340
2341     /* Surface has now been downloaded */
2342     surface->flags |= SFLAG_INSYSMEM;
2343 }
2344
2345 /* This call just uploads data, the caller is responsible for binding the
2346  * correct texture. */
2347 /* Context activation is done by the caller. */
2348 void surface_upload_data(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2349         const struct wined3d_format *format, const RECT *src_rect, UINT src_w, const POINT *dst_point,
2350         BOOL srgb, const struct wined3d_bo_address *data)
2351 {
2352     UINT update_w = src_rect->right - src_rect->left;
2353     UINT update_h = src_rect->bottom - src_rect->top;
2354
2355     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_w %u, dst_point %p, srgb %#x, data {%#x:%p}.\n",
2356             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_w,
2357             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2358
2359     if (format->heightscale != 1.0f && format->heightscale != 0.0f)
2360         update_h *= format->heightscale;
2361
2362     ENTER_GL();
2363
2364     if (data->buffer_object)
2365     {
2366         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2367         checkGLcall("glBindBufferARB");
2368     }
2369
2370     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2371     {
2372         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2373         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2374         UINT src_pitch = wined3d_format_calculate_size(format, 1, src_w, 1);
2375         const BYTE *addr = data->addr;
2376         GLenum internal;
2377
2378         addr += (src_rect->top / format->block_height) * src_pitch;
2379         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2380
2381         if (srgb)
2382             internal = format->glGammaInternal;
2383         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2384             internal = format->rtInternal;
2385         else
2386             internal = format->glInternal;
2387
2388         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2389                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2390                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2391
2392         if (row_length == src_pitch)
2393         {
2394             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2395                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2396         }
2397         else
2398         {
2399             UINT row, y;
2400
2401             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2402              * can't use the unpack row length like below. */
2403             for (row = 0, y = dst_point->y; row < row_count; ++row)
2404             {
2405                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2406                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2407                 y += format->block_height;
2408                 addr += src_pitch;
2409             }
2410         }
2411         checkGLcall("glCompressedTexSubImage2DARB");
2412     }
2413     else
2414     {
2415         const BYTE *addr = data->addr;
2416
2417         addr += src_rect->top * src_w * format->byte_count;
2418         addr += src_rect->left * format->byte_count;
2419
2420         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2421                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2422                 update_w, update_h, format->glFormat, format->glType, addr);
2423
2424         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_w);
2425         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2426                 update_w, update_h, format->glFormat, format->glType, addr);
2427         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2428         checkGLcall("glTexSubImage2D");
2429     }
2430
2431     if (data->buffer_object)
2432     {
2433         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2434         checkGLcall("glBindBufferARB");
2435     }
2436
2437     LEAVE_GL();
2438
2439     if (wined3d_settings.strict_draw_ordering)
2440         wglFlush();
2441
2442     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2443     {
2444         struct wined3d_device *device = surface->resource.device;
2445         unsigned int i;
2446
2447         for (i = 0; i < device->context_count; ++i)
2448         {
2449             context_surface_update(device->contexts[i], surface);
2450         }
2451     }
2452 }
2453
2454 /* This call just allocates the texture, the caller is responsible for binding
2455  * the correct texture. */
2456 /* Context activation is done by the caller. */
2457 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2458         const struct wined3d_format *format, BOOL srgb)
2459 {
2460     BOOL enable_client_storage = FALSE;
2461     GLsizei width = surface->pow2Width;
2462     GLsizei height = surface->pow2Height;
2463     const BYTE *mem = NULL;
2464     GLenum internal;
2465
2466     if (srgb)
2467     {
2468         internal = format->glGammaInternal;
2469     }
2470     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2471     {
2472         internal = format->rtInternal;
2473     }
2474     else
2475     {
2476         internal = format->glInternal;
2477     }
2478
2479     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2480
2481     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2482             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2483             internal, width, height, format->glFormat, format->glType);
2484
2485     ENTER_GL();
2486
2487     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2488     {
2489         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2490                 || !surface->resource.allocatedMemory)
2491         {
2492             /* In some cases we want to disable client storage.
2493              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2494              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2495              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2496              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2497              */
2498             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2499             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2500             surface->flags &= ~SFLAG_CLIENT;
2501             enable_client_storage = TRUE;
2502         }
2503         else
2504         {
2505             surface->flags |= SFLAG_CLIENT;
2506
2507             /* Point OpenGL to our allocated texture memory. Do not use
2508              * resource.allocatedMemory here because it might point into a
2509              * PBO. Instead use heapMemory, but get the alignment right. */
2510             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2511                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2512         }
2513     }
2514
2515     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2516     {
2517         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2518                 internal, width, height, 0, surface->resource.size, mem));
2519         checkGLcall("glCompressedTexImage2DARB");
2520     }
2521     else
2522     {
2523         glTexImage2D(surface->texture_target, surface->texture_level,
2524                 internal, width, height, 0, format->glFormat, format->glType, mem);
2525         checkGLcall("glTexImage2D");
2526     }
2527
2528     if(enable_client_storage) {
2529         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2530         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2531     }
2532     LEAVE_GL();
2533 }
2534
2535 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2536  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2537 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2538 /* GL locking is done by the caller */
2539 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2540 {
2541     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2542     struct wined3d_renderbuffer_entry *entry;
2543     GLuint renderbuffer = 0;
2544     unsigned int src_width, src_height;
2545     unsigned int width, height;
2546
2547     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2548     {
2549         width = rt->pow2Width;
2550         height = rt->pow2Height;
2551     }
2552     else
2553     {
2554         width = surface->pow2Width;
2555         height = surface->pow2Height;
2556     }
2557
2558     src_width = surface->pow2Width;
2559     src_height = surface->pow2Height;
2560
2561     /* A depth stencil smaller than the render target is not valid */
2562     if (width > src_width || height > src_height) return;
2563
2564     /* Remove any renderbuffer set if the sizes match */
2565     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2566             || (width == src_width && height == src_height))
2567     {
2568         surface->current_renderbuffer = NULL;
2569         return;
2570     }
2571
2572     /* Look if we've already got a renderbuffer of the correct dimensions */
2573     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2574     {
2575         if (entry->width == width && entry->height == height)
2576         {
2577             renderbuffer = entry->id;
2578             surface->current_renderbuffer = entry;
2579             break;
2580         }
2581     }
2582
2583     if (!renderbuffer)
2584     {
2585         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2586         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2587         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2588                 surface->resource.format->glInternal, width, height);
2589
2590         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2591         entry->width = width;
2592         entry->height = height;
2593         entry->id = renderbuffer;
2594         list_add_head(&surface->renderbuffers, &entry->entry);
2595
2596         surface->current_renderbuffer = entry;
2597     }
2598
2599     checkGLcall("set_compatible_renderbuffer");
2600 }
2601
2602 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2603 {
2604     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2605
2606     TRACE("surface %p.\n", surface);
2607
2608     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2609     {
2610         ERR("Surface %p is not on a swapchain.\n", surface);
2611         return GL_NONE;
2612     }
2613
2614     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2615     {
2616         if (swapchain->render_to_fbo)
2617         {
2618             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2619             return GL_COLOR_ATTACHMENT0;
2620         }
2621         TRACE("Returning GL_BACK\n");
2622         return GL_BACK;
2623     }
2624     else if (surface == swapchain->front_buffer)
2625     {
2626         TRACE("Returning GL_FRONT\n");
2627         return GL_FRONT;
2628     }
2629
2630     FIXME("Higher back buffer, returning GL_BACK\n");
2631     return GL_BACK;
2632 }
2633
2634 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2635 void surface_add_dirty_rect(struct wined3d_surface *surface, const WINED3DBOX *dirty_rect)
2636 {
2637     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2638
2639     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2640         /* No partial locking for textures yet. */
2641         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2642
2643     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2644     if (dirty_rect)
2645     {
2646         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->Left);
2647         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->Top);
2648         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->Right);
2649         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->Bottom);
2650     }
2651     else
2652     {
2653         surface->dirtyRect.left = 0;
2654         surface->dirtyRect.top = 0;
2655         surface->dirtyRect.right = surface->resource.width;
2656         surface->dirtyRect.bottom = surface->resource.height;
2657     }
2658
2659     /* if the container is a texture then mark it dirty. */
2660     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2661     {
2662         TRACE("Passing to container.\n");
2663         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2664     }
2665 }
2666
2667 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2668 {
2669     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2670     BOOL ck_changed;
2671
2672     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2673
2674     if (surface->resource.pool == WINED3DPOOL_SCRATCH)
2675     {
2676         ERR("Not supported on scratch surfaces.\n");
2677         return WINED3DERR_INVALIDCALL;
2678     }
2679
2680     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2681
2682     /* Reload if either the texture and sysmem have different ideas about the
2683      * color key, or the actual key values changed. */
2684     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2685             && (surface->glCKey.dwColorSpaceLowValue != surface->SrcBltCKey.dwColorSpaceLowValue
2686             || surface->glCKey.dwColorSpaceHighValue != surface->SrcBltCKey.dwColorSpaceHighValue)))
2687     {
2688         TRACE("Reloading because of color keying\n");
2689         /* To perform the color key conversion we need a sysmem copy of
2690          * the surface. Make sure we have it. */
2691
2692         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2693         /* Make sure the texture is reloaded because of the color key change,
2694          * this kills performance though :( */
2695         /* TODO: This is not necessarily needed with hw palettized texture support. */
2696         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2697         /* Switching color keying on / off may change the internal format. */
2698         if (ck_changed)
2699             surface_force_reload(surface);
2700     }
2701     else if (!(surface->flags & flag))
2702     {
2703         TRACE("Reloading because surface is dirty.\n");
2704     }
2705     else
2706     {
2707         TRACE("surface is already in texture\n");
2708         return WINED3D_OK;
2709     }
2710
2711     /* No partial locking for textures yet. */
2712     surface_load_location(surface, flag, NULL);
2713     surface_evict_sysmem(surface);
2714
2715     return WINED3D_OK;
2716 }
2717
2718 /* See also float_16_to_32() in wined3d_private.h */
2719 static inline unsigned short float_32_to_16(const float *in)
2720 {
2721     int exp = 0;
2722     float tmp = fabsf(*in);
2723     unsigned int mantissa;
2724     unsigned short ret;
2725
2726     /* Deal with special numbers */
2727     if (*in == 0.0f)
2728         return 0x0000;
2729     if (isnan(*in))
2730         return 0x7c01;
2731     if (isinf(*in))
2732         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2733
2734     if (tmp < powf(2, 10))
2735     {
2736         do
2737         {
2738             tmp = tmp * 2.0f;
2739             exp--;
2740         } while (tmp < powf(2, 10));
2741     }
2742     else if (tmp >= powf(2, 11))
2743     {
2744         do
2745         {
2746             tmp /= 2.0f;
2747             exp++;
2748         } while (tmp >= powf(2, 11));
2749     }
2750
2751     mantissa = (unsigned int)tmp;
2752     if (tmp - mantissa >= 0.5f)
2753         ++mantissa; /* Round to nearest, away from zero. */
2754
2755     exp += 10;  /* Normalize the mantissa. */
2756     exp += 15;  /* Exponent is encoded with excess 15. */
2757
2758     if (exp > 30) /* too big */
2759     {
2760         ret = 0x7c00; /* INF */
2761     }
2762     else if (exp <= 0)
2763     {
2764         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2765         while (exp <= 0)
2766         {
2767             mantissa = mantissa >> 1;
2768             ++exp;
2769         }
2770         ret = mantissa & 0x3ff;
2771     }
2772     else
2773     {
2774         ret = (exp << 10) | (mantissa & 0x3ff);
2775     }
2776
2777     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2778     return ret;
2779 }
2780
2781 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2782 {
2783     ULONG refcount;
2784
2785     TRACE("Surface %p, container %p of type %#x.\n",
2786             surface, surface->container.u.base, surface->container.type);
2787
2788     switch (surface->container.type)
2789     {
2790         case WINED3D_CONTAINER_TEXTURE:
2791             return wined3d_texture_incref(surface->container.u.texture);
2792
2793         case WINED3D_CONTAINER_SWAPCHAIN:
2794             return wined3d_swapchain_incref(surface->container.u.swapchain);
2795
2796         default:
2797             ERR("Unhandled container type %#x.\n", surface->container.type);
2798         case WINED3D_CONTAINER_NONE:
2799             break;
2800     }
2801
2802     refcount = InterlockedIncrement(&surface->resource.ref);
2803     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2804
2805     return refcount;
2806 }
2807
2808 /* Do not call while under the GL lock. */
2809 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2810 {
2811     ULONG refcount;
2812
2813     TRACE("Surface %p, container %p of type %#x.\n",
2814             surface, surface->container.u.base, surface->container.type);
2815
2816     switch (surface->container.type)
2817     {
2818         case WINED3D_CONTAINER_TEXTURE:
2819             return wined3d_texture_decref(surface->container.u.texture);
2820
2821         case WINED3D_CONTAINER_SWAPCHAIN:
2822             return wined3d_swapchain_decref(surface->container.u.swapchain);
2823
2824         default:
2825             ERR("Unhandled container type %#x.\n", surface->container.type);
2826         case WINED3D_CONTAINER_NONE:
2827             break;
2828     }
2829
2830     refcount = InterlockedDecrement(&surface->resource.ref);
2831     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2832
2833     if (!refcount)
2834     {
2835         surface->surface_ops->surface_cleanup(surface);
2836         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2837
2838         TRACE("Destroyed surface %p.\n", surface);
2839         HeapFree(GetProcessHeap(), 0, surface);
2840     }
2841
2842     return refcount;
2843 }
2844
2845 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2846 {
2847     return resource_set_priority(&surface->resource, priority);
2848 }
2849
2850 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2851 {
2852     return resource_get_priority(&surface->resource);
2853 }
2854
2855 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2856 {
2857     TRACE("surface %p.\n", surface);
2858
2859     surface->surface_ops->surface_preload(surface);
2860 }
2861
2862 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2863 {
2864     TRACE("surface %p.\n", surface);
2865
2866     return surface->resource.parent;
2867 }
2868
2869 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2870 {
2871     TRACE("surface %p.\n", surface);
2872
2873     return &surface->resource;
2874 }
2875
2876 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2877 {
2878     TRACE("surface %p, flags %#x.\n", surface, flags);
2879
2880     switch (flags)
2881     {
2882         case WINEDDGBS_CANBLT:
2883         case WINEDDGBS_ISBLTDONE:
2884             return WINED3D_OK;
2885
2886         default:
2887             return WINED3DERR_INVALIDCALL;
2888     }
2889 }
2890
2891 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2892 {
2893     TRACE("surface %p, flags %#x.\n", surface, flags);
2894
2895     /* XXX: DDERR_INVALIDSURFACETYPE */
2896
2897     switch (flags)
2898     {
2899         case WINEDDGFS_CANFLIP:
2900         case WINEDDGFS_ISFLIPDONE:
2901             return WINED3D_OK;
2902
2903         default:
2904             return WINED3DERR_INVALIDCALL;
2905     }
2906 }
2907
2908 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2909 {
2910     TRACE("surface %p.\n", surface);
2911
2912     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2913     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2914 }
2915
2916 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2917 {
2918     TRACE("surface %p.\n", surface);
2919
2920     /* So far we don't lose anything :) */
2921     surface->flags &= ~SFLAG_LOST;
2922     return WINED3D_OK;
2923 }
2924
2925 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2926 {
2927     TRACE("surface %p, palette %p.\n", surface, palette);
2928
2929     if (surface->palette == palette)
2930     {
2931         TRACE("Nop palette change.\n");
2932         return WINED3D_OK;
2933     }
2934
2935     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
2936         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
2937
2938     surface->palette = palette;
2939
2940     if (palette)
2941     {
2942         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
2943             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
2944
2945         surface->surface_ops->surface_realize_palette(surface);
2946     }
2947
2948     return WINED3D_OK;
2949 }
2950
2951 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
2952         DWORD flags, const WINEDDCOLORKEY *color_key)
2953 {
2954     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
2955
2956     if (flags & WINEDDCKEY_COLORSPACE)
2957     {
2958         FIXME(" colorkey value not supported (%08x) !\n", flags);
2959         return WINED3DERR_INVALIDCALL;
2960     }
2961
2962     /* Dirtify the surface, but only if a key was changed. */
2963     if (color_key)
2964     {
2965         switch (flags & ~WINEDDCKEY_COLORSPACE)
2966         {
2967             case WINEDDCKEY_DESTBLT:
2968                 surface->DestBltCKey = *color_key;
2969                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
2970                 break;
2971
2972             case WINEDDCKEY_DESTOVERLAY:
2973                 surface->DestOverlayCKey = *color_key;
2974                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
2975                 break;
2976
2977             case WINEDDCKEY_SRCOVERLAY:
2978                 surface->SrcOverlayCKey = *color_key;
2979                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
2980                 break;
2981
2982             case WINEDDCKEY_SRCBLT:
2983                 surface->SrcBltCKey = *color_key;
2984                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
2985                 break;
2986         }
2987     }
2988     else
2989     {
2990         switch (flags & ~WINEDDCKEY_COLORSPACE)
2991         {
2992             case WINEDDCKEY_DESTBLT:
2993                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
2994                 break;
2995
2996             case WINEDDCKEY_DESTOVERLAY:
2997                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
2998                 break;
2999
3000             case WINEDDCKEY_SRCOVERLAY:
3001                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3002                 break;
3003
3004             case WINEDDCKEY_SRCBLT:
3005                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3006                 break;
3007         }
3008     }
3009
3010     return WINED3D_OK;
3011 }
3012
3013 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3014 {
3015     TRACE("surface %p.\n", surface);
3016
3017     return surface->palette;
3018 }
3019
3020 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3021 {
3022     const struct wined3d_format *format = surface->resource.format;
3023     DWORD pitch;
3024
3025     TRACE("surface %p.\n", surface);
3026
3027     if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3028     {
3029         /* Since compressed formats are block based, pitch means the amount of
3030          * bytes to the next row of block rather than the next row of pixels. */
3031         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3032         pitch = row_block_count * format->block_byte_count;
3033     }
3034     else
3035     {
3036         unsigned char alignment = surface->resource.device->surface_alignment;
3037         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3038         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3039     }
3040
3041     TRACE("Returning %u.\n", pitch);
3042
3043     return pitch;
3044 }
3045
3046 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3047 {
3048     TRACE("surface %p, mem %p.\n", surface, mem);
3049
3050     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
3051     {
3052         WARN("Surface is locked or the DC is in use.\n");
3053         return WINED3DERR_INVALIDCALL;
3054     }
3055
3056     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3057     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3058     {
3059         ERR("Not supported on render targets.\n");
3060         return WINED3DERR_INVALIDCALL;
3061     }
3062
3063     if (mem && mem != surface->resource.allocatedMemory)
3064     {
3065         void *release = NULL;
3066
3067         /* Do I have to copy the old surface content? */
3068         if (surface->flags & SFLAG_DIBSECTION)
3069         {
3070             SelectObject(surface->hDC, surface->dib.holdbitmap);
3071             DeleteDC(surface->hDC);
3072             /* Release the DIB section. */
3073             DeleteObject(surface->dib.DIBsection);
3074             surface->dib.bitmap_data = NULL;
3075             surface->resource.allocatedMemory = NULL;
3076             surface->hDC = NULL;
3077             surface->flags &= ~SFLAG_DIBSECTION;
3078         }
3079         else if (!(surface->flags & SFLAG_USERPTR))
3080         {
3081             release = surface->resource.heapMemory;
3082             surface->resource.heapMemory = NULL;
3083         }
3084         surface->resource.allocatedMemory = mem;
3085         surface->flags |= SFLAG_USERPTR;
3086
3087         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3088         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3089
3090         /* For client textures OpenGL has to be notified. */
3091         if (surface->flags & SFLAG_CLIENT)
3092             surface_release_client_storage(surface);
3093
3094         /* Now free the old memory if any. */
3095         HeapFree(GetProcessHeap(), 0, release);
3096     }
3097     else if (surface->flags & SFLAG_USERPTR)
3098     {
3099         /* HeapMemory should be NULL already. */
3100         if (surface->resource.heapMemory)
3101             ERR("User pointer surface has heap memory allocated.\n");
3102
3103         if (!mem)
3104         {
3105             surface->resource.allocatedMemory = NULL;
3106             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3107
3108             if (surface->flags & SFLAG_CLIENT)
3109                 surface_release_client_storage(surface);
3110
3111             surface_prepare_system_memory(surface);
3112         }
3113
3114         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3115     }
3116
3117     return WINED3D_OK;
3118 }
3119
3120 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3121 {
3122     LONG w, h;
3123
3124     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3125
3126     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3127     {
3128         WARN("Not an overlay surface.\n");
3129         return WINEDDERR_NOTAOVERLAYSURFACE;
3130     }
3131
3132     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3133     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3134     surface->overlay_destrect.left = x;
3135     surface->overlay_destrect.top = y;
3136     surface->overlay_destrect.right = x + w;
3137     surface->overlay_destrect.bottom = y + h;
3138
3139     surface->surface_ops->surface_draw_overlay(surface);
3140
3141     return WINED3D_OK;
3142 }
3143
3144 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3145 {
3146     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3147
3148     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3149     {
3150         TRACE("Not an overlay surface.\n");
3151         return WINEDDERR_NOTAOVERLAYSURFACE;
3152     }
3153
3154     if (!surface->overlay_dest)
3155     {
3156         TRACE("Overlay not visible.\n");
3157         *x = 0;
3158         *y = 0;
3159         return WINEDDERR_OVERLAYNOTVISIBLE;
3160     }
3161
3162     *x = surface->overlay_destrect.left;
3163     *y = surface->overlay_destrect.top;
3164
3165     TRACE("Returning position %d, %d.\n", *x, *y);
3166
3167     return WINED3D_OK;
3168 }
3169
3170 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3171         DWORD flags, struct wined3d_surface *ref)
3172 {
3173     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3174
3175     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3176     {
3177         TRACE("Not an overlay surface.\n");
3178         return WINEDDERR_NOTAOVERLAYSURFACE;
3179     }
3180
3181     return WINED3D_OK;
3182 }
3183
3184 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3185         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3186 {
3187     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3188             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3189
3190     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3191     {
3192         WARN("Not an overlay surface.\n");
3193         return WINEDDERR_NOTAOVERLAYSURFACE;
3194     }
3195     else if (!dst_surface)
3196     {
3197         WARN("Dest surface is NULL.\n");
3198         return WINED3DERR_INVALIDCALL;
3199     }
3200
3201     if (src_rect)
3202     {
3203         surface->overlay_srcrect = *src_rect;
3204     }
3205     else
3206     {
3207         surface->overlay_srcrect.left = 0;
3208         surface->overlay_srcrect.top = 0;
3209         surface->overlay_srcrect.right = surface->resource.width;
3210         surface->overlay_srcrect.bottom = surface->resource.height;
3211     }
3212
3213     if (dst_rect)
3214     {
3215         surface->overlay_destrect = *dst_rect;
3216     }
3217     else
3218     {
3219         surface->overlay_destrect.left = 0;
3220         surface->overlay_destrect.top = 0;
3221         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3222         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3223     }
3224
3225     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3226     {
3227         list_remove(&surface->overlay_entry);
3228     }
3229
3230     if (flags & WINEDDOVER_SHOW)
3231     {
3232         if (surface->overlay_dest != dst_surface)
3233         {
3234             surface->overlay_dest = dst_surface;
3235             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3236         }
3237     }
3238     else if (flags & WINEDDOVER_HIDE)
3239     {
3240         /* tests show that the rectangles are erased on hide */
3241         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3242         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3243         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3244         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3245         surface->overlay_dest = NULL;
3246     }
3247
3248     surface->surface_ops->surface_draw_overlay(surface);
3249
3250     return WINED3D_OK;
3251 }
3252
3253 HRESULT CDECL wined3d_surface_set_clipper(struct wined3d_surface *surface, struct wined3d_clipper *clipper)
3254 {
3255     TRACE("surface %p, clipper %p.\n", surface, clipper);
3256
3257     surface->clipper = clipper;
3258
3259     return WINED3D_OK;
3260 }
3261
3262 struct wined3d_clipper * CDECL wined3d_surface_get_clipper(const struct wined3d_surface *surface)
3263 {
3264     TRACE("surface %p.\n", surface);
3265
3266     return surface->clipper;
3267 }
3268
3269 HRESULT CDECL wined3d_surface_set_format(struct wined3d_surface *surface, enum wined3d_format_id format_id)
3270 {
3271     const struct wined3d_format *format = wined3d_get_format(&surface->resource.device->adapter->gl_info, format_id);
3272
3273     TRACE("surface %p, format %s.\n", surface, debug_d3dformat(format_id));
3274
3275     if (surface->resource.format->id != WINED3DFMT_UNKNOWN)
3276     {
3277         FIXME("The format of the surface must be WINED3DFORMAT_UNKNOWN.\n");
3278         return WINED3DERR_INVALIDCALL;
3279     }
3280
3281     surface->resource.size = wined3d_format_calculate_size(format, surface->resource.device->surface_alignment,
3282             surface->pow2Width, surface->pow2Height);
3283     surface->flags |= (WINED3DFMT_D16_LOCKABLE == format_id) ? SFLAG_LOCKABLE : 0;
3284     surface->resource.format = format;
3285
3286     TRACE("size %u, byte_count %u\n", surface->resource.size, format->byte_count);
3287     TRACE("glFormat %#x, glInternal %#x, glType %#x.\n",
3288             format->glFormat, format->glInternal, format->glType);
3289
3290     return WINED3D_OK;
3291 }
3292
3293 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3294         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3295 {
3296     unsigned short *dst_s;
3297     const float *src_f;
3298     unsigned int x, y;
3299
3300     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3301
3302     for (y = 0; y < h; ++y)
3303     {
3304         src_f = (const float *)(src + y * pitch_in);
3305         dst_s = (unsigned short *) (dst + y * pitch_out);
3306         for (x = 0; x < w; ++x)
3307         {
3308             dst_s[x] = float_32_to_16(src_f + x);
3309         }
3310     }
3311 }
3312
3313 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3314         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3315 {
3316     static const unsigned char convert_5to8[] =
3317     {
3318         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3319         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3320         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3321         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3322     };
3323     static const unsigned char convert_6to8[] =
3324     {
3325         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3326         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3327         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3328         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3329         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3330         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3331         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3332         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3333     };
3334     unsigned int x, y;
3335
3336     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3337
3338     for (y = 0; y < h; ++y)
3339     {
3340         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3341         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3342         for (x = 0; x < w; ++x)
3343         {
3344             WORD pixel = src_line[x];
3345             dst_line[x] = 0xff000000
3346                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3347                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3348                     | convert_5to8[(pixel & 0x001f)];
3349         }
3350     }
3351 }
3352
3353 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3354  * in both cases we're just setting the X / Alpha channel to 0xff. */
3355 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3356         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3357 {
3358     unsigned int x, y;
3359
3360     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3361
3362     for (y = 0; y < h; ++y)
3363     {
3364         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3365         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3366
3367         for (x = 0; x < w; ++x)
3368         {
3369             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3370         }
3371     }
3372 }
3373
3374 static inline BYTE cliptobyte(int x)
3375 {
3376     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3377 }
3378
3379 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3380         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3381 {
3382     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3383     unsigned int x, y;
3384
3385     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3386
3387     for (y = 0; y < h; ++y)
3388     {
3389         const BYTE *src_line = src + y * pitch_in;
3390         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3391         for (x = 0; x < w; ++x)
3392         {
3393             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3394              *     C = Y - 16; D = U - 128; E = V - 128;
3395              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3396              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3397              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3398              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3399              * U and V are shared between the pixels. */
3400             if (!(x & 1)) /* For every even pixel, read new U and V. */
3401             {
3402                 d = (int) src_line[1] - 128;
3403                 e = (int) src_line[3] - 128;
3404                 r2 = 409 * e + 128;
3405                 g2 = - 100 * d - 208 * e + 128;
3406                 b2 = 516 * d + 128;
3407             }
3408             c2 = 298 * ((int) src_line[0] - 16);
3409             dst_line[x] = 0xff000000
3410                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3411                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3412                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3413                 /* Scale RGB values to 0..255 range,
3414                  * then clip them if still not in range (may be negative),
3415                  * then shift them within DWORD if necessary. */
3416             src_line += 2;
3417         }
3418     }
3419 }
3420
3421 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3422         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3423 {
3424     unsigned int x, y;
3425     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3426
3427     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3428
3429     for (y = 0; y < h; ++y)
3430     {
3431         const BYTE *src_line = src + y * pitch_in;
3432         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3433         for (x = 0; x < w; ++x)
3434         {
3435             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3436              *     C = Y - 16; D = U - 128; E = V - 128;
3437              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3438              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3439              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3440              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3441              * U and V are shared between the pixels. */
3442             if (!(x & 1)) /* For every even pixel, read new U and V. */
3443             {
3444                 d = (int) src_line[1] - 128;
3445                 e = (int) src_line[3] - 128;
3446                 r2 = 409 * e + 128;
3447                 g2 = - 100 * d - 208 * e + 128;
3448                 b2 = 516 * d + 128;
3449             }
3450             c2 = 298 * ((int) src_line[0] - 16);
3451             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3452                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3453                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3454                 /* Scale RGB values to 0..255 range,
3455                  * then clip them if still not in range (may be negative),
3456                  * then shift them within DWORD if necessary. */
3457             src_line += 2;
3458         }
3459     }
3460 }
3461
3462 struct d3dfmt_convertor_desc
3463 {
3464     enum wined3d_format_id from, to;
3465     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3466 };
3467
3468 static const struct d3dfmt_convertor_desc convertors[] =
3469 {
3470     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3471     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3472     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3473     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3474     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3475     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3476 };
3477
3478 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3479         enum wined3d_format_id to)
3480 {
3481     unsigned int i;
3482
3483     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3484     {
3485         if (convertors[i].from == from && convertors[i].to == to)
3486             return &convertors[i];
3487     }
3488
3489     return NULL;
3490 }
3491
3492 /*****************************************************************************
3493  * surface_convert_format
3494  *
3495  * Creates a duplicate of a surface in a different format. Is used by Blt to
3496  * blit between surfaces with different formats.
3497  *
3498  * Parameters
3499  *  source: Source surface
3500  *  fmt: Requested destination format
3501  *
3502  *****************************************************************************/
3503 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3504 {
3505     const struct d3dfmt_convertor_desc *conv;
3506     WINED3DLOCKED_RECT lock_src, lock_dst;
3507     struct wined3d_surface *ret = NULL;
3508     HRESULT hr;
3509
3510     conv = find_convertor(source->resource.format->id, to_fmt);
3511     if (!conv)
3512     {
3513         FIXME("Cannot find a conversion function from format %s to %s.\n",
3514                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3515         return NULL;
3516     }
3517
3518     wined3d_surface_create(source->resource.device, source->resource.width,
3519             source->resource.height, to_fmt, TRUE /* lockable */, TRUE /* discard  */, 0 /* level */,
3520             0 /* usage */, WINED3DPOOL_SCRATCH, WINED3DMULTISAMPLE_NONE /* TODO: Multisampled conversion */,
3521             0 /* MultiSampleQuality */, source->surface_type, NULL /* parent */, &wined3d_null_parent_ops, &ret);
3522     if (!ret)
3523     {
3524         ERR("Failed to create a destination surface for conversion.\n");
3525         return NULL;
3526     }
3527
3528     memset(&lock_src, 0, sizeof(lock_src));
3529     memset(&lock_dst, 0, sizeof(lock_dst));
3530
3531     hr = wined3d_surface_map(source, &lock_src, NULL, WINED3DLOCK_READONLY);
3532     if (FAILED(hr))
3533     {
3534         ERR("Failed to lock the source surface.\n");
3535         wined3d_surface_decref(ret);
3536         return NULL;
3537     }
3538     hr = wined3d_surface_map(ret, &lock_dst, NULL, WINED3DLOCK_READONLY);
3539     if (FAILED(hr))
3540     {
3541         ERR("Failed to lock the destination surface.\n");
3542         wined3d_surface_unmap(source);
3543         wined3d_surface_decref(ret);
3544         return NULL;
3545     }
3546
3547     conv->convert(lock_src.pBits, lock_dst.pBits, lock_src.Pitch, lock_dst.Pitch,
3548             source->resource.width, source->resource.height);
3549
3550     wined3d_surface_unmap(ret);
3551     wined3d_surface_unmap(source);
3552
3553     return ret;
3554 }
3555
3556 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3557         unsigned int bpp, UINT pitch, DWORD color)
3558 {
3559     BYTE *first;
3560     int x, y;
3561
3562     /* Do first row */
3563
3564 #define COLORFILL_ROW(type) \
3565 do { \
3566     type *d = (type *)buf; \
3567     for (x = 0; x < width; ++x) \
3568         d[x] = (type)color; \
3569 } while(0)
3570
3571     switch (bpp)
3572     {
3573         case 1:
3574             COLORFILL_ROW(BYTE);
3575             break;
3576
3577         case 2:
3578             COLORFILL_ROW(WORD);
3579             break;
3580
3581         case 3:
3582         {
3583             BYTE *d = buf;
3584             for (x = 0; x < width; ++x, d += 3)
3585             {
3586                 d[0] = (color      ) & 0xFF;
3587                 d[1] = (color >>  8) & 0xFF;
3588                 d[2] = (color >> 16) & 0xFF;
3589             }
3590             break;
3591         }
3592         case 4:
3593             COLORFILL_ROW(DWORD);
3594             break;
3595
3596         default:
3597             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3598             return WINED3DERR_NOTAVAILABLE;
3599     }
3600
3601 #undef COLORFILL_ROW
3602
3603     /* Now copy first row. */
3604     first = buf;
3605     for (y = 1; y < height; ++y)
3606     {
3607         buf += pitch;
3608         memcpy(buf, first, width * bpp);
3609     }
3610
3611     return WINED3D_OK;
3612 }
3613
3614 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3615 {
3616     TRACE("surface %p.\n", surface);
3617
3618     if (!(surface->flags & SFLAG_LOCKED))
3619     {
3620         WARN("Trying to unmap unmapped surface.\n");
3621         return WINEDDERR_NOTLOCKED;
3622     }
3623     surface->flags &= ~SFLAG_LOCKED;
3624
3625     surface->surface_ops->surface_unmap(surface);
3626
3627     return WINED3D_OK;
3628 }
3629
3630 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3631         WINED3DLOCKED_RECT *locked_rect, const RECT *rect, DWORD flags)
3632 {
3633     TRACE("surface %p, locked_rect %p, rect %s, flags %#x.\n",
3634             surface, locked_rect, wine_dbgstr_rect(rect), flags);
3635
3636     if (surface->flags & SFLAG_LOCKED)
3637     {
3638         WARN("Surface is already mapped.\n");
3639         return WINED3DERR_INVALIDCALL;
3640     }
3641     surface->flags |= SFLAG_LOCKED;
3642
3643     if (!(surface->flags & SFLAG_LOCKABLE))
3644         WARN("Trying to lock unlockable surface.\n");
3645
3646     surface->surface_ops->surface_map(surface, rect, flags);
3647
3648     locked_rect->Pitch = wined3d_surface_get_pitch(surface);
3649
3650     if (!rect)
3651     {
3652         locked_rect->pBits = surface->resource.allocatedMemory;
3653         surface->lockedRect.left = 0;
3654         surface->lockedRect.top = 0;
3655         surface->lockedRect.right = surface->resource.width;
3656         surface->lockedRect.bottom = surface->resource.height;
3657     }
3658     else
3659     {
3660         const struct wined3d_format *format = surface->resource.format;
3661
3662         if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3663         {
3664             /* Compressed textures are block based, so calculate the offset of
3665              * the block that contains the top-left pixel of the locked rectangle. */
3666             locked_rect->pBits = surface->resource.allocatedMemory
3667                     + ((rect->top / format->block_height) * locked_rect->Pitch)
3668                     + ((rect->left / format->block_width) * format->block_byte_count);
3669         }
3670         else
3671         {
3672             locked_rect->pBits = surface->resource.allocatedMemory
3673                     + (locked_rect->Pitch * rect->top)
3674                     + (rect->left * format->byte_count);
3675         }
3676         surface->lockedRect.left = rect->left;
3677         surface->lockedRect.top = rect->top;
3678         surface->lockedRect.right = rect->right;
3679         surface->lockedRect.bottom = rect->bottom;
3680     }
3681
3682     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3683     TRACE("Returning memory %p, pitch %u.\n", locked_rect->pBits, locked_rect->Pitch);
3684
3685     return WINED3D_OK;
3686 }
3687
3688 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3689 {
3690     HRESULT hr;
3691
3692     TRACE("surface %p, dc %p.\n", surface, dc);
3693
3694     if (surface->flags & SFLAG_USERPTR)
3695     {
3696         ERR("Not supported on surfaces with application-provided memory.\n");
3697         return WINEDDERR_NODC;
3698     }
3699
3700     /* Give more detailed info for ddraw. */
3701     if (surface->flags & SFLAG_DCINUSE)
3702         return WINEDDERR_DCALREADYCREATED;
3703
3704     /* Can't GetDC if the surface is locked. */
3705     if (surface->flags & SFLAG_LOCKED)
3706         return WINED3DERR_INVALIDCALL;
3707
3708     hr = surface->surface_ops->surface_getdc(surface);
3709     if (FAILED(hr))
3710         return hr;
3711
3712     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3713             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3714     {
3715         /* GetDC on palettized formats is unsupported in D3D9, and the method
3716          * is missing in D3D8, so this should only be used for DX <=7
3717          * surfaces (with non-device palettes). */
3718         const PALETTEENTRY *pal = NULL;
3719
3720         if (surface->palette)
3721         {
3722             pal = surface->palette->palents;
3723         }
3724         else
3725         {
3726             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3727             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3728
3729             if (dds_primary && dds_primary->palette)
3730                 pal = dds_primary->palette->palents;
3731         }
3732
3733         if (pal)
3734         {
3735             RGBQUAD col[256];
3736             unsigned int i;
3737
3738             for (i = 0; i < 256; ++i)
3739             {
3740                 col[i].rgbRed = pal[i].peRed;
3741                 col[i].rgbGreen = pal[i].peGreen;
3742                 col[i].rgbBlue = pal[i].peBlue;
3743                 col[i].rgbReserved = 0;
3744             }
3745             SetDIBColorTable(surface->hDC, 0, 256, col);
3746         }
3747     }
3748
3749     surface->flags |= SFLAG_DCINUSE;
3750
3751     *dc = surface->hDC;
3752     TRACE("Returning dc %p.\n", *dc);
3753
3754     return WINED3D_OK;
3755 }
3756
3757 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3758 {
3759     TRACE("surface %p, dc %p.\n", surface, dc);
3760
3761     if (!(surface->flags & SFLAG_DCINUSE))
3762         return WINEDDERR_NODC;
3763
3764     if (surface->hDC != dc)
3765     {
3766         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3767                 dc, surface->hDC);
3768         return WINEDDERR_NODC;
3769     }
3770
3771     /* Copy the contents of the DIB over to the PBO. */
3772     if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
3773         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
3774
3775     /* We locked first, so unlock now. */
3776     wined3d_surface_unmap(surface);
3777
3778     surface->flags &= ~SFLAG_DCINUSE;
3779
3780     return WINED3D_OK;
3781 }
3782
3783 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3784 {
3785     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3786
3787     if (flags)
3788     {
3789         static UINT once;
3790         if (!once++)
3791             FIXME("Ignoring flags %#x.\n", flags);
3792         else
3793             WARN("Ignoring flags %#x.\n", flags);
3794     }
3795
3796     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
3797     {
3798         ERR("Not supported on swapchain surfaces.\n");
3799         return WINEDDERR_NOTFLIPPABLE;
3800     }
3801
3802     /* Flipping is only supported on render targets and overlays. */
3803     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
3804     {
3805         WARN("Tried to flip a non-render target, non-overlay surface.\n");
3806         return WINEDDERR_NOTFLIPPABLE;
3807     }
3808
3809     flip_surface(surface, override);
3810
3811     /* Update overlays if they're visible. */
3812     if ((surface->resource.usage & WINED3DUSAGE_OVERLAY) && surface->overlay_dest)
3813         return surface->surface_ops->surface_draw_overlay(surface);
3814
3815     return WINED3D_OK;
3816 }
3817
3818 /* Do not call while under the GL lock. */
3819 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3820 {
3821     struct wined3d_device *device = surface->resource.device;
3822
3823     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3824
3825     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3826     {
3827         struct wined3d_texture *texture = surface->container.u.texture;
3828
3829         TRACE("Passing to container (%p).\n", texture);
3830         texture->texture_ops->texture_preload(texture, srgb);
3831     }
3832     else
3833     {
3834         struct wined3d_context *context;
3835
3836         TRACE("(%p) : About to load surface\n", surface);
3837
3838         /* TODO: Use already acquired context when possible. */
3839         context = context_acquire(device, NULL);
3840
3841         if (surface->resource.format->id == WINED3DFMT_P8_UINT
3842                 || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3843         {
3844             if (palette9_changed(surface))
3845             {
3846                 TRACE("Reloading surface because the d3d8/9 palette was changed\n");
3847                 /* TODO: This is not necessarily needed with hw palettized texture support */
3848                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3849                 /* Make sure the texture is reloaded because of the palette change, this kills performance though :( */
3850                 surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
3851             }
3852         }
3853
3854         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3855
3856         if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3857         {
3858             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3859             GLclampf tmp;
3860             tmp = 0.9f;
3861             ENTER_GL();
3862             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3863             LEAVE_GL();
3864         }
3865
3866         context_release(context);
3867     }
3868 }
3869
3870 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3871 {
3872     if (!surface->resource.allocatedMemory)
3873     {
3874         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
3875                 surface->resource.size + RESOURCE_ALIGNMENT);
3876         if (!surface->resource.heapMemory)
3877         {
3878             ERR("Out of memory\n");
3879             return FALSE;
3880         }
3881         surface->resource.allocatedMemory =
3882             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
3883     }
3884     else
3885     {
3886         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
3887     }
3888
3889     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3890
3891     return TRUE;
3892 }
3893
3894 /* Read the framebuffer back into the surface */
3895 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
3896 {
3897     struct wined3d_device *device = surface->resource.device;
3898     const struct wined3d_gl_info *gl_info;
3899     struct wined3d_context *context;
3900     BYTE *mem;
3901     GLint fmt;
3902     GLint type;
3903     BYTE *row, *top, *bottom;
3904     int i;
3905     BOOL bpp;
3906     RECT local_rect;
3907     BOOL srcIsUpsideDown;
3908     GLint rowLen = 0;
3909     GLint skipPix = 0;
3910     GLint skipRow = 0;
3911
3912     if(wined3d_settings.rendertargetlock_mode == RTL_DISABLE) {
3913         static BOOL warned = FALSE;
3914         if(!warned) {
3915             ERR("The application tries to lock the render target, but render target locking is disabled\n");
3916             warned = TRUE;
3917         }
3918         return;
3919     }
3920
3921     context = context_acquire(device, surface);
3922     context_apply_blit_state(context, device);
3923     gl_info = context->gl_info;
3924
3925     ENTER_GL();
3926
3927     /* Select the correct read buffer, and give some debug output.
3928      * There is no need to keep track of the current read buffer or reset it, every part of the code
3929      * that reads sets the read buffer as desired.
3930      */
3931     if (surface_is_offscreen(surface))
3932     {
3933         /* Mapping the primary render target which is not on a swapchain.
3934          * Read from the back buffer. */
3935         TRACE("Mapping offscreen render target.\n");
3936         glReadBuffer(device->offscreenBuffer);
3937         srcIsUpsideDown = TRUE;
3938     }
3939     else
3940     {
3941         /* Onscreen surfaces are always part of a swapchain */
3942         GLenum buffer = surface_get_gl_buffer(surface);
3943         TRACE("Mapping %#x buffer.\n", buffer);
3944         glReadBuffer(buffer);
3945         checkGLcall("glReadBuffer");
3946         srcIsUpsideDown = FALSE;
3947     }
3948
3949     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
3950     if (!rect)
3951     {
3952         local_rect.left = 0;
3953         local_rect.top = 0;
3954         local_rect.right = surface->resource.width;
3955         local_rect.bottom = surface->resource.height;
3956     }
3957     else
3958     {
3959         local_rect = *rect;
3960     }
3961     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
3962
3963     switch (surface->resource.format->id)
3964     {
3965         case WINED3DFMT_P8_UINT:
3966         {
3967             if (primary_render_target_is_p8(device))
3968             {
3969                 /* In case of P8 render targets the index is stored in the alpha component */
3970                 fmt = GL_ALPHA;
3971                 type = GL_UNSIGNED_BYTE;
3972                 mem = dest;
3973                 bpp = surface->resource.format->byte_count;
3974             }
3975             else
3976             {
3977                 /* GL can't return palettized data, so read ARGB pixels into a
3978                  * separate block of memory and convert them into palettized format
3979                  * in software. Slow, but if the app means to use palettized render
3980                  * targets and locks it...
3981                  *
3982                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
3983                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
3984                  * for the color channels when palettizing the colors.
3985                  */
3986                 fmt = GL_RGB;
3987                 type = GL_UNSIGNED_BYTE;
3988                 pitch *= 3;
3989                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
3990                 if (!mem)
3991                 {
3992                     ERR("Out of memory\n");
3993                     LEAVE_GL();
3994                     return;
3995                 }
3996                 bpp = surface->resource.format->byte_count * 3;
3997             }
3998         }
3999         break;
4000
4001         default:
4002             mem = dest;
4003             fmt = surface->resource.format->glFormat;
4004             type = surface->resource.format->glType;
4005             bpp = surface->resource.format->byte_count;
4006     }
4007
4008     if (surface->flags & SFLAG_PBO)
4009     {
4010         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4011         checkGLcall("glBindBufferARB");
4012         if (mem)
4013         {
4014             ERR("mem not null for pbo -- unexpected\n");
4015             mem = NULL;
4016         }
4017     }
4018
4019     /* Save old pixel store pack state */
4020     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4021     checkGLcall("glGetIntegerv");
4022     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4023     checkGLcall("glGetIntegerv");
4024     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4025     checkGLcall("glGetIntegerv");
4026
4027     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4028     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4029     checkGLcall("glPixelStorei");
4030     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4031     checkGLcall("glPixelStorei");
4032     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4033     checkGLcall("glPixelStorei");
4034
4035     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4036             local_rect.right - local_rect.left,
4037             local_rect.bottom - local_rect.top,
4038             fmt, type, mem);
4039     checkGLcall("glReadPixels");
4040
4041     /* Reset previous pixel store pack state */
4042     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4043     checkGLcall("glPixelStorei");
4044     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4045     checkGLcall("glPixelStorei");
4046     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4047     checkGLcall("glPixelStorei");
4048
4049     if (surface->flags & SFLAG_PBO)
4050     {
4051         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4052         checkGLcall("glBindBufferARB");
4053
4054         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4055          * to get a pointer to it and perform the flipping in software. This is a lot
4056          * faster than calling glReadPixels for each line. In case we want more speed
4057          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4058         if (!srcIsUpsideDown)
4059         {
4060             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4061             checkGLcall("glBindBufferARB");
4062
4063             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4064             checkGLcall("glMapBufferARB");
4065         }
4066     }
4067
4068     /* TODO: Merge this with the palettization loop below for P8 targets */
4069     if(!srcIsUpsideDown) {
4070         UINT len, off;
4071         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4072             Flip the lines in software */
4073         len = (local_rect.right - local_rect.left) * bpp;
4074         off = local_rect.left * bpp;
4075
4076         row = HeapAlloc(GetProcessHeap(), 0, len);
4077         if(!row) {
4078             ERR("Out of memory\n");
4079             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4080                 HeapFree(GetProcessHeap(), 0, mem);
4081             LEAVE_GL();
4082             return;
4083         }
4084
4085         top = mem + pitch * local_rect.top;
4086         bottom = mem + pitch * (local_rect.bottom - 1);
4087         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4088             memcpy(row, top + off, len);
4089             memcpy(top + off, bottom + off, len);
4090             memcpy(bottom + off, row, len);
4091             top += pitch;
4092             bottom -= pitch;
4093         }
4094         HeapFree(GetProcessHeap(), 0, row);
4095
4096         /* Unmap the temp PBO buffer */
4097         if (surface->flags & SFLAG_PBO)
4098         {
4099             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4100             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4101         }
4102     }
4103
4104     LEAVE_GL();
4105     context_release(context);
4106
4107     /* For P8 textures we need to perform an inverse palette lookup. This is
4108      * done by searching for a palette index which matches the RGB value.
4109      * Note this isn't guaranteed to work when there are multiple entries for
4110      * the same color but we have no choice. In case of P8 render targets,
4111      * the index is stored in the alpha component so no conversion is needed. */
4112     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4113     {
4114         const PALETTEENTRY *pal = NULL;
4115         DWORD width = pitch / 3;
4116         int x, y, c;
4117
4118         if (surface->palette)
4119         {
4120             pal = surface->palette->palents;
4121         }
4122         else
4123         {
4124             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4125             HeapFree(GetProcessHeap(), 0, mem);
4126             return;
4127         }
4128
4129         for(y = local_rect.top; y < local_rect.bottom; y++) {
4130             for(x = local_rect.left; x < local_rect.right; x++) {
4131                 /*                      start              lines            pixels      */
4132                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4133                 const BYTE *green = blue  + 1;
4134                 const BYTE *red = green + 1;
4135
4136                 for(c = 0; c < 256; c++) {
4137                     if(*red   == pal[c].peRed   &&
4138                        *green == pal[c].peGreen &&
4139                        *blue  == pal[c].peBlue)
4140                     {
4141                         *((BYTE *) dest + y * width + x) = c;
4142                         break;
4143                     }
4144                 }
4145             }
4146         }
4147         HeapFree(GetProcessHeap(), 0, mem);
4148     }
4149 }
4150
4151 /* Read the framebuffer contents into a texture */
4152 static void read_from_framebuffer_texture(struct wined3d_surface *surface, BOOL srgb)
4153 {
4154     struct wined3d_device *device = surface->resource.device;
4155     struct wined3d_context *context;
4156
4157     if (!surface_is_offscreen(surface))
4158     {
4159         /* We would need to flip onscreen surfaces, but there's no efficient
4160          * way to do that here. It makes more sense for the caller to
4161          * explicitly go through sysmem. */
4162         ERR("Not supported for onscreen targets.\n");
4163         return;
4164     }
4165
4166     /* Activate the surface to read from. In some situations it isn't the currently active target(e.g. backbuffer
4167      * locking during offscreen rendering). RESOURCELOAD is ok because glCopyTexSubImage2D isn't affected by any
4168      * states in the stateblock, and no driver was found yet that had bugs in that regard.
4169      */
4170     context = context_acquire(device, surface);
4171     device_invalidate_state(device, STATE_FRAMEBUFFER);
4172
4173     surface_prepare_texture(surface, context, srgb);
4174     surface_bind_and_dirtify(surface, context, srgb);
4175
4176     TRACE("Reading back offscreen render target %p.\n", surface);
4177
4178     ENTER_GL();
4179
4180     glReadBuffer(device->offscreenBuffer);
4181     checkGLcall("glReadBuffer");
4182
4183     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4184             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4185     checkGLcall("glCopyTexSubImage2D");
4186
4187     LEAVE_GL();
4188
4189     context_release(context);
4190 }
4191
4192 /* Context activation is done by the caller. */
4193 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4194         struct wined3d_context *context, BOOL srgb)
4195 {
4196     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4197     CONVERT_TYPES convert;
4198     struct wined3d_format format;
4199
4200     if (surface->flags & alloc_flag) return;
4201
4202     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4203     if (convert != NO_CONVERSION || format.convert) surface->flags |= SFLAG_CONVERTED;
4204     else surface->flags &= ~SFLAG_CONVERTED;
4205
4206     surface_bind_and_dirtify(surface, context, srgb);
4207     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4208     surface->flags |= alloc_flag;
4209 }
4210
4211 /* Context activation is done by the caller. */
4212 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4213 {
4214     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4215     {
4216         struct wined3d_texture *texture = surface->container.u.texture;
4217         UINT sub_count = texture->level_count * texture->layer_count;
4218         UINT i;
4219
4220         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4221
4222         for (i = 0; i < sub_count; ++i)
4223         {
4224             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4225             surface_prepare_texture_internal(s, context, srgb);
4226         }
4227
4228         return;
4229     }
4230
4231     surface_prepare_texture_internal(surface, context, srgb);
4232 }
4233
4234 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4235 {
4236     if (multisample)
4237     {
4238         if (surface->rb_multisample)
4239             return;
4240
4241         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4242         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4243         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4244                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4245         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4246     }
4247     else
4248     {
4249         if (surface->rb_resolved)
4250             return;
4251
4252         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4253         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4254         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4255                 surface->pow2Width, surface->pow2Height);
4256         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4257     }
4258 }
4259
4260 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4261         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4262 {
4263     struct wined3d_device *device = surface->resource.device;
4264     UINT pitch = wined3d_surface_get_pitch(surface);
4265     const struct wined3d_gl_info *gl_info;
4266     struct wined3d_context *context;
4267     RECT local_rect;
4268     UINT w, h;
4269
4270     surface_get_rect(surface, rect, &local_rect);
4271
4272     mem += local_rect.top * pitch + local_rect.left * bpp;
4273     w = local_rect.right - local_rect.left;
4274     h = local_rect.bottom - local_rect.top;
4275
4276     /* Activate the correct context for the render target */
4277     context = context_acquire(device, surface);
4278     context_apply_blit_state(context, device);
4279     gl_info = context->gl_info;
4280
4281     ENTER_GL();
4282
4283     if (!surface_is_offscreen(surface))
4284     {
4285         GLenum buffer = surface_get_gl_buffer(surface);
4286         TRACE("Unlocking %#x buffer.\n", buffer);
4287         context_set_draw_buffer(context, buffer);
4288
4289         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4290         glPixelZoom(1.0f, -1.0f);
4291     }
4292     else
4293     {
4294         /* Primary offscreen render target */
4295         TRACE("Offscreen render target.\n");
4296         context_set_draw_buffer(context, device->offscreenBuffer);
4297
4298         glPixelZoom(1.0f, 1.0f);
4299     }
4300
4301     glRasterPos3i(local_rect.left, local_rect.top, 1);
4302     checkGLcall("glRasterPos3i");
4303
4304     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4305     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4306
4307     if (surface->flags & SFLAG_PBO)
4308     {
4309         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4310         checkGLcall("glBindBufferARB");
4311     }
4312
4313     glDrawPixels(w, h, fmt, type, mem);
4314     checkGLcall("glDrawPixels");
4315
4316     if (surface->flags & SFLAG_PBO)
4317     {
4318         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4319         checkGLcall("glBindBufferARB");
4320     }
4321
4322     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4323     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4324
4325     LEAVE_GL();
4326
4327     if (wined3d_settings.strict_draw_ordering
4328             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4329             && surface->container.u.swapchain->front_buffer == surface))
4330         wglFlush();
4331
4332     context_release(context);
4333 }
4334
4335 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck,
4336         BOOL use_texturing, struct wined3d_format *format, CONVERT_TYPES *convert)
4337 {
4338     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4339     const struct wined3d_device *device = surface->resource.device;
4340     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4341     BOOL blit_supported = FALSE;
4342
4343     /* Copy the default values from the surface. Below we might perform fixups */
4344     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4345     *format = *surface->resource.format;
4346     *convert = NO_CONVERSION;
4347
4348     /* Ok, now look if we have to do any conversion */
4349     switch (surface->resource.format->id)
4350     {
4351         case WINED3DFMT_P8_UINT:
4352             /* Below the call to blit_supported is disabled for Wine 1.2
4353              * because the function isn't operating correctly yet. At the
4354              * moment 8-bit blits are handled in software and if certain GL
4355              * extensions are around, surface conversion is performed at
4356              * upload time. The blit_supported call recognizes it as a
4357              * destination fixup. This type of upload 'fixup' and 8-bit to
4358              * 8-bit blits need to be handled by the blit_shader.
4359              * TODO: get rid of this #if 0. */
4360 #if 0
4361             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4362                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4363                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4364 #endif
4365             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4366
4367             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4368              * texturing. Further also use conversion in case of color keying.
4369              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4370              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4371              * conflicts with this.
4372              */
4373             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4374                     || colorkey_active || !use_texturing)
4375             {
4376                 format->glFormat = GL_RGBA;
4377                 format->glInternal = GL_RGBA;
4378                 format->glType = GL_UNSIGNED_BYTE;
4379                 format->conv_byte_count = 4;
4380                 if (colorkey_active)
4381                     *convert = CONVERT_PALETTED_CK;
4382                 else
4383                     *convert = CONVERT_PALETTED;
4384             }
4385             break;
4386
4387         case WINED3DFMT_B2G3R3_UNORM:
4388             /* **********************
4389                 GL_UNSIGNED_BYTE_3_3_2
4390                 ********************** */
4391             if (colorkey_active) {
4392                 /* This texture format will never be used.. So do not care about color keying
4393                     up until the point in time it will be needed :-) */
4394                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4395             }
4396             break;
4397
4398         case WINED3DFMT_B5G6R5_UNORM:
4399             if (colorkey_active)
4400             {
4401                 *convert = CONVERT_CK_565;
4402                 format->glFormat = GL_RGBA;
4403                 format->glInternal = GL_RGB5_A1;
4404                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4405                 format->conv_byte_count = 2;
4406             }
4407             break;
4408
4409         case WINED3DFMT_B5G5R5X1_UNORM:
4410             if (colorkey_active)
4411             {
4412                 *convert = CONVERT_CK_5551;
4413                 format->glFormat = GL_BGRA;
4414                 format->glInternal = GL_RGB5_A1;
4415                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4416                 format->conv_byte_count = 2;
4417             }
4418             break;
4419
4420         case WINED3DFMT_B8G8R8_UNORM:
4421             if (colorkey_active)
4422             {
4423                 *convert = CONVERT_CK_RGB24;
4424                 format->glFormat = GL_RGBA;
4425                 format->glInternal = GL_RGBA8;
4426                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4427                 format->conv_byte_count = 4;
4428             }
4429             break;
4430
4431         case WINED3DFMT_B8G8R8X8_UNORM:
4432             if (colorkey_active)
4433             {
4434                 *convert = CONVERT_RGB32_888;
4435                 format->glFormat = GL_RGBA;
4436                 format->glInternal = GL_RGBA8;
4437                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4438                 format->conv_byte_count = 4;
4439             }
4440             break;
4441
4442         default:
4443             break;
4444     }
4445
4446     return WINED3D_OK;
4447 }
4448
4449 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4450 {
4451     const struct wined3d_device *device = surface->resource.device;
4452     const struct wined3d_palette *pal = surface->palette;
4453     BOOL index_in_alpha = FALSE;
4454     unsigned int i;
4455
4456     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4457      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4458      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4459      * duplicate entries. Store the color key in the unused alpha component to speed the
4460      * download up and to make conversion unneeded. */
4461     index_in_alpha = primary_render_target_is_p8(device);
4462
4463     if (!pal)
4464     {
4465         /* In DirectDraw the palette is a property of the surface, there are no such things as device palettes. */
4466         if (device->wined3d->flags & WINED3D_PALETTE_PER_SURFACE)
4467         {
4468             ERR("This code should never get entered for DirectDraw!, expect problems\n");
4469             if (index_in_alpha)
4470             {
4471                 /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4472                  * there's no palette at this time. */
4473                 for (i = 0; i < 256; i++) table[i][3] = i;
4474             }
4475         }
4476         else
4477         {
4478             /* Direct3D >= 8 palette usage style: P8 textures use device palettes, palette entry format is A8R8G8B8,
4479              * alpha is stored in peFlags and may be used by the app if D3DPTEXTURECAPS_ALPHAPALETTE device
4480              * capability flag is present (wine does advertise this capability) */
4481             for (i = 0; i < 256; ++i)
4482             {
4483                 table[i][0] = device->palettes[device->currentPalette][i].peRed;
4484                 table[i][1] = device->palettes[device->currentPalette][i].peGreen;
4485                 table[i][2] = device->palettes[device->currentPalette][i].peBlue;
4486                 table[i][3] = device->palettes[device->currentPalette][i].peFlags;
4487             }
4488         }
4489     }
4490     else
4491     {
4492         TRACE("Using surface palette %p\n", pal);
4493         /* Get the surface's palette */
4494         for (i = 0; i < 256; ++i)
4495         {
4496             table[i][0] = pal->palents[i].peRed;
4497             table[i][1] = pal->palents[i].peGreen;
4498             table[i][2] = pal->palents[i].peBlue;
4499
4500             /* When index_in_alpha is set the palette index is stored in the
4501              * alpha component. In case of a readback we can then read
4502              * GL_ALPHA. Color keying is handled in BltOverride using a
4503              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4504              * color key itself is passed to glAlphaFunc in other cases the
4505              * alpha component of pixels that should be masked away is set to 0. */
4506             if (index_in_alpha)
4507             {
4508                 table[i][3] = i;
4509             }
4510             else if (colorkey && (i >= surface->SrcBltCKey.dwColorSpaceLowValue)
4511                     && (i <= surface->SrcBltCKey.dwColorSpaceHighValue))
4512             {
4513                 table[i][3] = 0x00;
4514             }
4515             else if (pal->flags & WINEDDPCAPS_ALPHA)
4516             {
4517                 table[i][3] = pal->palents[i].peFlags;
4518             }
4519             else
4520             {
4521                 table[i][3] = 0xFF;
4522             }
4523         }
4524     }
4525 }
4526
4527 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width,
4528         UINT height, UINT outpitch, CONVERT_TYPES convert, struct wined3d_surface *surface)
4529 {
4530     const BYTE *source;
4531     BYTE *dest;
4532     TRACE("(%p)->(%p),(%d,%d,%d,%d,%p)\n", src, dst, pitch, height, outpitch, convert, surface);
4533
4534     switch (convert) {
4535         case NO_CONVERSION:
4536         {
4537             memcpy(dst, src, pitch * height);
4538             break;
4539         }
4540         case CONVERT_PALETTED:
4541         case CONVERT_PALETTED_CK:
4542         {
4543             BYTE table[256][4];
4544             unsigned int x, y;
4545
4546             d3dfmt_p8_init_palette(surface, table, (convert == CONVERT_PALETTED_CK));
4547
4548             for (y = 0; y < height; y++)
4549             {
4550                 source = src + pitch * y;
4551                 dest = dst + outpitch * y;
4552                 /* This is an 1 bpp format, using the width here is fine */
4553                 for (x = 0; x < width; x++) {
4554                     BYTE color = *source++;
4555                     *dest++ = table[color][0];
4556                     *dest++ = table[color][1];
4557                     *dest++ = table[color][2];
4558                     *dest++ = table[color][3];
4559                 }
4560             }
4561         }
4562         break;
4563
4564         case CONVERT_CK_565:
4565         {
4566             /* Converting the 565 format in 5551 packed to emulate color-keying.
4567
4568               Note : in all these conversion, it would be best to average the averaging
4569                       pixels to get the color of the pixel that will be color-keyed to
4570                       prevent 'color bleeding'. This will be done later on if ever it is
4571                       too visible.
4572
4573               Note2: Nvidia documents say that their driver does not support alpha + color keying
4574                      on the same surface and disables color keying in such a case
4575             */
4576             unsigned int x, y;
4577             const WORD *Source;
4578             WORD *Dest;
4579
4580             TRACE("Color keyed 565\n");
4581
4582             for (y = 0; y < height; y++) {
4583                 Source = (const WORD *)(src + y * pitch);
4584                 Dest = (WORD *) (dst + y * outpitch);
4585                 for (x = 0; x < width; x++ ) {
4586                     WORD color = *Source++;
4587                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4588                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4589                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4590                         *Dest |= 0x0001;
4591                     Dest++;
4592                 }
4593             }
4594         }
4595         break;
4596
4597         case CONVERT_CK_5551:
4598         {
4599             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4600             unsigned int x, y;
4601             const WORD *Source;
4602             WORD *Dest;
4603             TRACE("Color keyed 5551\n");
4604             for (y = 0; y < height; y++) {
4605                 Source = (const WORD *)(src + y * pitch);
4606                 Dest = (WORD *) (dst + y * outpitch);
4607                 for (x = 0; x < width; x++ ) {
4608                     WORD color = *Source++;
4609                     *Dest = color;
4610                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4611                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4612                         *Dest |= (1 << 15);
4613                     else
4614                         *Dest &= ~(1 << 15);
4615                     Dest++;
4616                 }
4617             }
4618         }
4619         break;
4620
4621         case CONVERT_CK_RGB24:
4622         {
4623             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4624             unsigned int x, y;
4625             for (y = 0; y < height; y++)
4626             {
4627                 source = src + pitch * y;
4628                 dest = dst + outpitch * y;
4629                 for (x = 0; x < width; x++) {
4630                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4631                     DWORD dstcolor = color << 8;
4632                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4633                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4634                         dstcolor |= 0xff;
4635                     *(DWORD*)dest = dstcolor;
4636                     source += 3;
4637                     dest += 4;
4638                 }
4639             }
4640         }
4641         break;
4642
4643         case CONVERT_RGB32_888:
4644         {
4645             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4646             unsigned int x, y;
4647             for (y = 0; y < height; y++)
4648             {
4649                 source = src + pitch * y;
4650                 dest = dst + outpitch * y;
4651                 for (x = 0; x < width; x++) {
4652                     DWORD color = 0xffffff & *(const DWORD*)source;
4653                     DWORD dstcolor = color << 8;
4654                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4655                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4656                         dstcolor |= 0xff;
4657                     *(DWORD*)dest = dstcolor;
4658                     source += 4;
4659                     dest += 4;
4660                 }
4661             }
4662         }
4663         break;
4664
4665         default:
4666             ERR("Unsupported conversion type %#x.\n", convert);
4667     }
4668     return WINED3D_OK;
4669 }
4670
4671 BOOL palette9_changed(struct wined3d_surface *surface)
4672 {
4673     struct wined3d_device *device = surface->resource.device;
4674
4675     if (surface->palette || (surface->resource.format->id != WINED3DFMT_P8_UINT
4676             && surface->resource.format->id != WINED3DFMT_P8_UINT_A8_UNORM))
4677     {
4678         /* If a ddraw-style palette is attached assume no d3d9 palette change.
4679          * Also the palette isn't interesting if the surface format isn't P8 or A8P8
4680          */
4681         return FALSE;
4682     }
4683
4684     if (surface->palette9)
4685     {
4686         if (!memcmp(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256))
4687         {
4688             return FALSE;
4689         }
4690     }
4691     else
4692     {
4693         surface->palette9 = HeapAlloc(GetProcessHeap(), 0, sizeof(PALETTEENTRY) * 256);
4694     }
4695     memcpy(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256);
4696
4697     return TRUE;
4698 }
4699
4700 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4701 {
4702     /* Flip the surface contents */
4703     /* Flip the DC */
4704     {
4705         HDC tmp;
4706         tmp = front->hDC;
4707         front->hDC = back->hDC;
4708         back->hDC = tmp;
4709     }
4710
4711     /* Flip the DIBsection */
4712     {
4713         HBITMAP tmp;
4714         BOOL hasDib = front->flags & SFLAG_DIBSECTION;
4715         tmp = front->dib.DIBsection;
4716         front->dib.DIBsection = back->dib.DIBsection;
4717         back->dib.DIBsection = tmp;
4718
4719         if (back->flags & SFLAG_DIBSECTION) front->flags |= SFLAG_DIBSECTION;
4720         else front->flags &= ~SFLAG_DIBSECTION;
4721         if (hasDib) back->flags |= SFLAG_DIBSECTION;
4722         else back->flags &= ~SFLAG_DIBSECTION;
4723     }
4724
4725     /* Flip the surface data */
4726     {
4727         void* tmp;
4728
4729         tmp = front->dib.bitmap_data;
4730         front->dib.bitmap_data = back->dib.bitmap_data;
4731         back->dib.bitmap_data = tmp;
4732
4733         tmp = front->resource.allocatedMemory;
4734         front->resource.allocatedMemory = back->resource.allocatedMemory;
4735         back->resource.allocatedMemory = tmp;
4736
4737         tmp = front->resource.heapMemory;
4738         front->resource.heapMemory = back->resource.heapMemory;
4739         back->resource.heapMemory = tmp;
4740     }
4741
4742     /* Flip the PBO */
4743     {
4744         GLuint tmp_pbo = front->pbo;
4745         front->pbo = back->pbo;
4746         back->pbo = tmp_pbo;
4747     }
4748
4749     /* client_memory should not be different, but just in case */
4750     {
4751         BOOL tmp;
4752         tmp = front->dib.client_memory;
4753         front->dib.client_memory = back->dib.client_memory;
4754         back->dib.client_memory = tmp;
4755     }
4756
4757     /* Flip the opengl texture */
4758     {
4759         GLuint tmp;
4760
4761         tmp = back->texture_name;
4762         back->texture_name = front->texture_name;
4763         front->texture_name = tmp;
4764
4765         tmp = back->texture_name_srgb;
4766         back->texture_name_srgb = front->texture_name_srgb;
4767         front->texture_name_srgb = tmp;
4768
4769         tmp = back->rb_multisample;
4770         back->rb_multisample = front->rb_multisample;
4771         front->rb_multisample = tmp;
4772
4773         tmp = back->rb_resolved;
4774         back->rb_resolved = front->rb_resolved;
4775         front->rb_resolved = tmp;
4776
4777         resource_unload(&back->resource);
4778         resource_unload(&front->resource);
4779     }
4780
4781     {
4782         DWORD tmp_flags = back->flags;
4783         back->flags = front->flags;
4784         front->flags = tmp_flags;
4785     }
4786 }
4787
4788 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4789  * pixel copy calls. */
4790 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4791         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4792 {
4793     struct wined3d_device *device = dst_surface->resource.device;
4794     float xrel, yrel;
4795     UINT row;
4796     struct wined3d_context *context;
4797     BOOL upsidedown = FALSE;
4798     RECT dst_rect = *dst_rect_in;
4799
4800     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4801      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4802      */
4803     if(dst_rect.top > dst_rect.bottom) {
4804         UINT tmp = dst_rect.bottom;
4805         dst_rect.bottom = dst_rect.top;
4806         dst_rect.top = tmp;
4807         upsidedown = TRUE;
4808     }
4809
4810     context = context_acquire(device, src_surface);
4811     context_apply_blit_state(context, device);
4812     surface_internal_preload(dst_surface, SRGB_RGB);
4813     ENTER_GL();
4814
4815     /* Bind the target texture */
4816     context_bind_texture(context, dst_surface->texture_target, dst_surface->texture_name);
4817     if (surface_is_offscreen(src_surface))
4818     {
4819         TRACE("Reading from an offscreen target\n");
4820         upsidedown = !upsidedown;
4821         glReadBuffer(device->offscreenBuffer);
4822     }
4823     else
4824     {
4825         glReadBuffer(surface_get_gl_buffer(src_surface));
4826     }
4827     checkGLcall("glReadBuffer");
4828
4829     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4830     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4831
4832     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4833     {
4834         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4835
4836         if(Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT) {
4837             ERR("Texture filtering not supported in direct blit\n");
4838         }
4839     }
4840     else if ((Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT)
4841             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4842     {
4843         ERR("Texture filtering not supported in direct blit\n");
4844     }
4845
4846     if (upsidedown
4847             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4848             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4849     {
4850         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4851
4852         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4853                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4854                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4855                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4856     }
4857     else
4858     {
4859         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4860         /* I have to process this row by row to swap the image,
4861          * otherwise it would be upside down, so stretching in y direction
4862          * doesn't cost extra time
4863          *
4864          * However, stretching in x direction can be avoided if not necessary
4865          */
4866         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4867             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4868             {
4869                 /* Well, that stuff works, but it's very slow.
4870                  * find a better way instead
4871                  */
4872                 UINT col;
4873
4874                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4875                 {
4876                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4877                             dst_rect.left + col /* x offset */, row /* y offset */,
4878                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4879                 }
4880             }
4881             else
4882             {
4883                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4884                         dst_rect.left /* x offset */, row /* y offset */,
4885                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4886             }
4887         }
4888     }
4889     checkGLcall("glCopyTexSubImage2D");
4890
4891     LEAVE_GL();
4892     context_release(context);
4893
4894     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4895      * path is never entered
4896      */
4897     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4898 }
4899
4900 /* Uses the hardware to stretch and flip the image */
4901 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4902         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4903 {
4904     struct wined3d_device *device = dst_surface->resource.device;
4905     struct wined3d_swapchain *src_swapchain = NULL;
4906     GLuint src, backup = 0;
4907     float left, right, top, bottom; /* Texture coordinates */
4908     UINT fbwidth = src_surface->resource.width;
4909     UINT fbheight = src_surface->resource.height;
4910     struct wined3d_context *context;
4911     GLenum drawBuffer = GL_BACK;
4912     GLenum texture_target;
4913     BOOL noBackBufferBackup;
4914     BOOL src_offscreen;
4915     BOOL upsidedown = FALSE;
4916     RECT dst_rect = *dst_rect_in;
4917
4918     TRACE("Using hwstretch blit\n");
4919     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4920     context = context_acquire(device, src_surface);
4921     context_apply_blit_state(context, device);
4922     surface_internal_preload(dst_surface, SRGB_RGB);
4923
4924     src_offscreen = surface_is_offscreen(src_surface);
4925     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4926     if (!noBackBufferBackup && !src_surface->texture_name)
4927     {
4928         /* Get it a description */
4929         surface_internal_preload(src_surface, SRGB_RGB);
4930     }
4931     ENTER_GL();
4932
4933     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4934      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4935      */
4936     if (context->aux_buffers >= 2)
4937     {
4938         /* Got more than one aux buffer? Use the 2nd aux buffer */
4939         drawBuffer = GL_AUX1;
4940     }
4941     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4942     {
4943         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
4944         drawBuffer = GL_AUX0;
4945     }
4946
4947     if(noBackBufferBackup) {
4948         glGenTextures(1, &backup);
4949         checkGLcall("glGenTextures");
4950         context_bind_texture(context, GL_TEXTURE_2D, backup);
4951         texture_target = GL_TEXTURE_2D;
4952     } else {
4953         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
4954          * we are reading from the back buffer, the backup can be used as source texture
4955          */
4956         texture_target = src_surface->texture_target;
4957         context_bind_texture(context, texture_target, src_surface->texture_name);
4958         glEnable(texture_target);
4959         checkGLcall("glEnable(texture_target)");
4960
4961         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
4962         src_surface->flags &= ~SFLAG_INTEXTURE;
4963     }
4964
4965     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4966      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4967      */
4968     if(dst_rect.top > dst_rect.bottom) {
4969         UINT tmp = dst_rect.bottom;
4970         dst_rect.bottom = dst_rect.top;
4971         dst_rect.top = tmp;
4972         upsidedown = TRUE;
4973     }
4974
4975     if (src_offscreen)
4976     {
4977         TRACE("Reading from an offscreen target\n");
4978         upsidedown = !upsidedown;
4979         glReadBuffer(device->offscreenBuffer);
4980     }
4981     else
4982     {
4983         glReadBuffer(surface_get_gl_buffer(src_surface));
4984     }
4985
4986     /* TODO: Only back up the part that will be overwritten */
4987     glCopyTexSubImage2D(texture_target, 0,
4988                         0, 0 /* read offsets */,
4989                         0, 0,
4990                         fbwidth,
4991                         fbheight);
4992
4993     checkGLcall("glCopyTexSubImage2D");
4994
4995     /* No issue with overriding these - the sampler is dirty due to blit usage */
4996     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
4997             wined3d_gl_mag_filter(magLookup, Filter));
4998     checkGLcall("glTexParameteri");
4999     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
5000             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
5001     checkGLcall("glTexParameteri");
5002
5003     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5004         src_swapchain = src_surface->container.u.swapchain;
5005     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
5006     {
5007         src = backup ? backup : src_surface->texture_name;
5008     }
5009     else
5010     {
5011         glReadBuffer(GL_FRONT);
5012         checkGLcall("glReadBuffer(GL_FRONT)");
5013
5014         glGenTextures(1, &src);
5015         checkGLcall("glGenTextures(1, &src)");
5016         context_bind_texture(context, GL_TEXTURE_2D, src);
5017
5018         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5019          * out for power of 2 sizes
5020          */
5021         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5022                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5023         checkGLcall("glTexImage2D");
5024         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
5025                             0, 0 /* read offsets */,
5026                             0, 0,
5027                             fbwidth,
5028                             fbheight);
5029
5030         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5031         checkGLcall("glTexParameteri");
5032         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5033         checkGLcall("glTexParameteri");
5034
5035         glReadBuffer(GL_BACK);
5036         checkGLcall("glReadBuffer(GL_BACK)");
5037
5038         if(texture_target != GL_TEXTURE_2D) {
5039             glDisable(texture_target);
5040             glEnable(GL_TEXTURE_2D);
5041             texture_target = GL_TEXTURE_2D;
5042         }
5043     }
5044     checkGLcall("glEnd and previous");
5045
5046     left = src_rect->left;
5047     right = src_rect->right;
5048
5049     if (!upsidedown)
5050     {
5051         top = src_surface->resource.height - src_rect->top;
5052         bottom = src_surface->resource.height - src_rect->bottom;
5053     }
5054     else
5055     {
5056         top = src_surface->resource.height - src_rect->bottom;
5057         bottom = src_surface->resource.height - src_rect->top;
5058     }
5059
5060     if (src_surface->flags & SFLAG_NORMCOORD)
5061     {
5062         left /= src_surface->pow2Width;
5063         right /= src_surface->pow2Width;
5064         top /= src_surface->pow2Height;
5065         bottom /= src_surface->pow2Height;
5066     }
5067
5068     /* draw the source texture stretched and upside down. The correct surface is bound already */
5069     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5070     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5071
5072     context_set_draw_buffer(context, drawBuffer);
5073     glReadBuffer(drawBuffer);
5074
5075     glBegin(GL_QUADS);
5076         /* bottom left */
5077         glTexCoord2f(left, bottom);
5078         glVertex2i(0, 0);
5079
5080         /* top left */
5081         glTexCoord2f(left, top);
5082         glVertex2i(0, dst_rect.bottom - dst_rect.top);
5083
5084         /* top right */
5085         glTexCoord2f(right, top);
5086         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5087
5088         /* bottom right */
5089         glTexCoord2f(right, bottom);
5090         glVertex2i(dst_rect.right - dst_rect.left, 0);
5091     glEnd();
5092     checkGLcall("glEnd and previous");
5093
5094     if (texture_target != dst_surface->texture_target)
5095     {
5096         glDisable(texture_target);
5097         glEnable(dst_surface->texture_target);
5098         texture_target = dst_surface->texture_target;
5099     }
5100
5101     /* Now read the stretched and upside down image into the destination texture */
5102     context_bind_texture(context, texture_target, dst_surface->texture_name);
5103     glCopyTexSubImage2D(texture_target,
5104                         0,
5105                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5106                         0, 0, /* We blitted the image to the origin */
5107                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5108     checkGLcall("glCopyTexSubImage2D");
5109
5110     if(drawBuffer == GL_BACK) {
5111         /* Write the back buffer backup back */
5112         if(backup) {
5113             if(texture_target != GL_TEXTURE_2D) {
5114                 glDisable(texture_target);
5115                 glEnable(GL_TEXTURE_2D);
5116                 texture_target = GL_TEXTURE_2D;
5117             }
5118             context_bind_texture(context, GL_TEXTURE_2D, backup);
5119         }
5120         else
5121         {
5122             if (texture_target != src_surface->texture_target)
5123             {
5124                 glDisable(texture_target);
5125                 glEnable(src_surface->texture_target);
5126                 texture_target = src_surface->texture_target;
5127             }
5128             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5129         }
5130
5131         glBegin(GL_QUADS);
5132             /* top left */
5133             glTexCoord2f(0.0f, 0.0f);
5134             glVertex2i(0, fbheight);
5135
5136             /* bottom left */
5137             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5138             glVertex2i(0, 0);
5139
5140             /* bottom right */
5141             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5142                     (float)fbheight / (float)src_surface->pow2Height);
5143             glVertex2i(fbwidth, 0);
5144
5145             /* top right */
5146             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5147             glVertex2i(fbwidth, fbheight);
5148         glEnd();
5149     }
5150     glDisable(texture_target);
5151     checkGLcall("glDisable(texture_target)");
5152
5153     /* Cleanup */
5154     if (src != src_surface->texture_name && src != backup)
5155     {
5156         glDeleteTextures(1, &src);
5157         checkGLcall("glDeleteTextures(1, &src)");
5158     }
5159     if(backup) {
5160         glDeleteTextures(1, &backup);
5161         checkGLcall("glDeleteTextures(1, &backup)");
5162     }
5163
5164     LEAVE_GL();
5165
5166     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5167
5168     context_release(context);
5169
5170     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5171      * path is never entered
5172      */
5173     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5174 }
5175
5176 /* Front buffer coordinates are always full screen coordinates, but our GL
5177  * drawable is limited to the window's client area. The sysmem and texture
5178  * copies do have the full screen size. Note that GL has a bottom-left
5179  * origin, while D3D has a top-left origin. */
5180 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5181 {
5182     UINT drawable_height;
5183
5184     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5185             && surface == surface->container.u.swapchain->front_buffer)
5186     {
5187         POINT offset = {0, 0};
5188         RECT windowsize;
5189
5190         ScreenToClient(window, &offset);
5191         OffsetRect(rect, offset.x, offset.y);
5192
5193         GetClientRect(window, &windowsize);
5194         drawable_height = windowsize.bottom - windowsize.top;
5195     }
5196     else
5197     {
5198         drawable_height = surface->resource.height;
5199     }
5200
5201     rect->top = drawable_height - rect->top;
5202     rect->bottom = drawable_height - rect->bottom;
5203 }
5204
5205 static void surface_blt_to_drawable(struct wined3d_device *device,
5206         WINED3DTEXTUREFILTERTYPE filter, BOOL color_key,
5207         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5208         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5209 {
5210     struct wined3d_context *context;
5211     RECT src_rect, dst_rect;
5212
5213     src_rect = *src_rect_in;
5214     dst_rect = *dst_rect_in;
5215
5216     /* Make sure the surface is up-to-date. This should probably use
5217      * surface_load_location() and worry about the destination surface too,
5218      * unless we're overwriting it completely. */
5219     surface_internal_preload(src_surface, SRGB_RGB);
5220
5221     /* Activate the destination context, set it up for blitting */
5222     context = context_acquire(device, dst_surface);
5223     context_apply_blit_state(context, device);
5224
5225     if (!surface_is_offscreen(dst_surface))
5226         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5227
5228     device->blitter->set_shader(device->blit_priv, context, src_surface);
5229
5230     ENTER_GL();
5231
5232     if (color_key)
5233     {
5234         glEnable(GL_ALPHA_TEST);
5235         checkGLcall("glEnable(GL_ALPHA_TEST)");
5236
5237         /* When the primary render target uses P8, the alpha component
5238          * contains the palette index. Which means that the colorkey is one of
5239          * the palette entries. In other cases pixels that should be masked
5240          * away have alpha set to 0. */
5241         if (primary_render_target_is_p8(device))
5242             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->SrcBltCKey.dwColorSpaceLowValue / 256.0f);
5243         else
5244             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5245         checkGLcall("glAlphaFunc");
5246     }
5247     else
5248     {
5249         glDisable(GL_ALPHA_TEST);
5250         checkGLcall("glDisable(GL_ALPHA_TEST)");
5251     }
5252
5253     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5254
5255     if (color_key)
5256     {
5257         glDisable(GL_ALPHA_TEST);
5258         checkGLcall("glDisable(GL_ALPHA_TEST)");
5259     }
5260
5261     LEAVE_GL();
5262
5263     /* Leave the opengl state valid for blitting */
5264     device->blitter->unset_shader(context->gl_info);
5265
5266     if (wined3d_settings.strict_draw_ordering
5267             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5268             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5269         wglFlush(); /* Flush to ensure ordering across contexts. */
5270
5271     context_release(context);
5272 }
5273
5274 /* Do not call while under the GL lock. */
5275 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const WINED3DCOLORVALUE *color)
5276 {
5277     struct wined3d_device *device = s->resource.device;
5278     const struct blit_shader *blitter;
5279
5280     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5281             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5282     if (!blitter)
5283     {
5284         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5285         return WINED3DERR_INVALIDCALL;
5286     }
5287
5288     return blitter->color_fill(device, s, rect, color);
5289 }
5290
5291 /* Do not call while under the GL lock. */
5292 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5293         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5294         WINED3DTEXTUREFILTERTYPE Filter)
5295 {
5296     struct wined3d_device *device = dst_surface->resource.device;
5297     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5298     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5299
5300     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5301             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5302             flags, DDBltFx, debug_d3dtexturefiltertype(Filter));
5303
5304     /* Get the swapchain. One of the surfaces has to be a primary surface */
5305     if (dst_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5306     {
5307         WARN("Destination is in sysmem, rejecting gl blt\n");
5308         return WINED3DERR_INVALIDCALL;
5309     }
5310
5311     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5312         dstSwapchain = dst_surface->container.u.swapchain;
5313
5314     if (src_surface)
5315     {
5316         if (src_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5317         {
5318             WARN("Src is in sysmem, rejecting gl blt\n");
5319             return WINED3DERR_INVALIDCALL;
5320         }
5321
5322         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5323             srcSwapchain = src_surface->container.u.swapchain;
5324     }
5325
5326     /* Early sort out of cases where no render target is used */
5327     if (!dstSwapchain && !srcSwapchain
5328             && src_surface != device->fb.render_targets[0]
5329             && dst_surface != device->fb.render_targets[0])
5330     {
5331         TRACE("No surface is render target, not using hardware blit.\n");
5332         return WINED3DERR_INVALIDCALL;
5333     }
5334
5335     /* No destination color keying supported */
5336     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5337     {
5338         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5339         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5340         return WINED3DERR_INVALIDCALL;
5341     }
5342
5343     if (dstSwapchain && dstSwapchain == srcSwapchain)
5344     {
5345         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5346         return WINED3DERR_INVALIDCALL;
5347     }
5348
5349     if (dstSwapchain && srcSwapchain)
5350     {
5351         FIXME("Implement hardware blit between two different swapchains\n");
5352         return WINED3DERR_INVALIDCALL;
5353     }
5354
5355     if (dstSwapchain)
5356     {
5357         /* Handled with regular texture -> swapchain blit */
5358         if (src_surface == device->fb.render_targets[0])
5359             TRACE("Blit from active render target to a swapchain\n");
5360     }
5361     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5362     {
5363         FIXME("Implement blit from a swapchain to the active render target\n");
5364         return WINED3DERR_INVALIDCALL;
5365     }
5366
5367     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5368     {
5369         /* Blit from render target to texture */
5370         BOOL stretchx;
5371
5372         /* P8 read back is not implemented */
5373         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5374                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5375         {
5376             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5377             return WINED3DERR_INVALIDCALL;
5378         }
5379
5380         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5381         {
5382             TRACE("Color keying not supported by frame buffer to texture blit\n");
5383             return WINED3DERR_INVALIDCALL;
5384             /* Destination color key is checked above */
5385         }
5386
5387         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5388             stretchx = TRUE;
5389         else
5390             stretchx = FALSE;
5391
5392         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5393          * flip the image nor scale it.
5394          *
5395          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5396          * -> If the app wants a image width an unscaled width, copy it line per line
5397          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5398          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5399          *    back buffer. This is slower than reading line per line, thus not used for flipping
5400          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5401          *    pixel by pixel. */
5402         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5403                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5404         {
5405             TRACE("No stretching in x direction, using direct framebuffer -> texture copy\n");
5406             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, Filter);
5407         } else {
5408             TRACE("Using hardware stretching to flip / stretch the texture\n");
5409             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, Filter);
5410         }
5411
5412         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5413         {
5414             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5415             dst_surface->resource.allocatedMemory = NULL;
5416             dst_surface->resource.heapMemory = NULL;
5417         }
5418         else
5419         {
5420             dst_surface->flags &= ~SFLAG_INSYSMEM;
5421         }
5422
5423         return WINED3D_OK;
5424     }
5425     else if (src_surface)
5426     {
5427         /* Blit from offscreen surface to render target */
5428         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5429         WINEDDCOLORKEY oldBltCKey = src_surface->SrcBltCKey;
5430
5431         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5432
5433         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5434                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5435                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5436         {
5437             FIXME("Unsupported blit operation falling back to software\n");
5438             return WINED3DERR_INVALIDCALL;
5439         }
5440
5441         /* Color keying: Check if we have to do a color keyed blt,
5442          * and if not check if a color key is activated.
5443          *
5444          * Just modify the color keying parameters in the surface and restore them afterwards
5445          * The surface keeps track of the color key last used to load the opengl surface.
5446          * PreLoad will catch the change to the flags and color key and reload if necessary.
5447          */
5448         if (flags & WINEDDBLT_KEYSRC)
5449         {
5450             /* Use color key from surface */
5451         }
5452         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5453         {
5454             /* Use color key from DDBltFx */
5455             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5456             src_surface->SrcBltCKey = DDBltFx->ddckSrcColorkey;
5457         }
5458         else
5459         {
5460             /* Do not use color key */
5461             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5462         }
5463
5464         surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5465                 src_surface, src_rect, dst_surface, dst_rect);
5466
5467         /* Restore the color key parameters */
5468         src_surface->CKeyFlags = oldCKeyFlags;
5469         src_surface->SrcBltCKey = oldBltCKey;
5470
5471         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5472
5473         return WINED3D_OK;
5474     }
5475
5476     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5477     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5478     return WINED3DERR_INVALIDCALL;
5479 }
5480
5481 /* GL locking is done by the caller */
5482 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5483         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5484 {
5485     struct wined3d_device *device = surface->resource.device;
5486     const struct wined3d_gl_info *gl_info = context->gl_info;
5487     GLint compare_mode = GL_NONE;
5488     struct blt_info info;
5489     GLint old_binding = 0;
5490     RECT rect;
5491
5492     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5493
5494     glDisable(GL_CULL_FACE);
5495     glDisable(GL_BLEND);
5496     glDisable(GL_ALPHA_TEST);
5497     glDisable(GL_SCISSOR_TEST);
5498     glDisable(GL_STENCIL_TEST);
5499     glEnable(GL_DEPTH_TEST);
5500     glDepthFunc(GL_ALWAYS);
5501     glDepthMask(GL_TRUE);
5502     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5503     glViewport(x, y, w, h);
5504
5505     SetRect(&rect, 0, h, w, 0);
5506     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5507     context_active_texture(context, context->gl_info, 0);
5508     glGetIntegerv(info.binding, &old_binding);
5509     glBindTexture(info.bind_target, texture);
5510     if (gl_info->supported[ARB_SHADOW])
5511     {
5512         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5513         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5514     }
5515
5516     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5517             gl_info, info.tex_type, &surface->ds_current_size);
5518
5519     glBegin(GL_TRIANGLE_STRIP);
5520     glTexCoord3fv(info.coords[0]);
5521     glVertex2f(-1.0f, -1.0f);
5522     glTexCoord3fv(info.coords[1]);
5523     glVertex2f(1.0f, -1.0f);
5524     glTexCoord3fv(info.coords[2]);
5525     glVertex2f(-1.0f, 1.0f);
5526     glTexCoord3fv(info.coords[3]);
5527     glVertex2f(1.0f, 1.0f);
5528     glEnd();
5529
5530     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5531     glBindTexture(info.bind_target, old_binding);
5532
5533     glPopAttrib();
5534
5535     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5536 }
5537
5538 void surface_modify_ds_location(struct wined3d_surface *surface,
5539         DWORD location, UINT w, UINT h)
5540 {
5541     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5542
5543     if (location & ~SFLAG_DS_LOCATIONS)
5544         FIXME("Invalid location (%#x) specified.\n", location);
5545
5546     surface->ds_current_size.cx = w;
5547     surface->ds_current_size.cy = h;
5548     surface->flags &= ~SFLAG_DS_LOCATIONS;
5549     surface->flags |= location;
5550 }
5551
5552 /* Context activation is done by the caller. */
5553 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5554 {
5555     struct wined3d_device *device = surface->resource.device;
5556     GLsizei w, h;
5557
5558     TRACE("surface %p, new location %#x.\n", surface, location);
5559
5560     /* TODO: Make this work for modes other than FBO */
5561     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5562
5563     if (!(surface->flags & location))
5564     {
5565         w = surface->ds_current_size.cx;
5566         h = surface->ds_current_size.cy;
5567         surface->ds_current_size.cx = 0;
5568         surface->ds_current_size.cy = 0;
5569     }
5570     else
5571     {
5572         w = surface->resource.width;
5573         h = surface->resource.height;
5574     }
5575
5576     if (surface->ds_current_size.cx == surface->resource.width
5577             && surface->ds_current_size.cy == surface->resource.height)
5578     {
5579         TRACE("Location (%#x) is already up to date.\n", location);
5580         return;
5581     }
5582
5583     if (surface->current_renderbuffer)
5584     {
5585         FIXME("Not supported with fixed up depth stencil.\n");
5586         return;
5587     }
5588
5589     if (!(surface->flags & SFLAG_DS_LOCATIONS))
5590     {
5591         /* This mostly happens when a depth / stencil is used without being
5592          * cleared first. In principle we could upload from sysmem, or
5593          * explicitly clear before first usage. For the moment there don't
5594          * appear to be a lot of applications depending on this, so a FIXME
5595          * should do. */
5596         FIXME("No up to date depth stencil location.\n");
5597         surface->flags |= location;
5598         surface->ds_current_size.cx = surface->resource.width;
5599         surface->ds_current_size.cy = surface->resource.height;
5600         return;
5601     }
5602
5603     if (location == SFLAG_DS_OFFSCREEN)
5604     {
5605         GLint old_binding = 0;
5606         GLenum bind_target;
5607
5608         /* The render target is allowed to be smaller than the depth/stencil
5609          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5610          * than the offscreen surface. Don't overwrite the offscreen surface
5611          * with undefined data. */
5612         w = min(w, context->swapchain->presentParms.BackBufferWidth);
5613         h = min(h, context->swapchain->presentParms.BackBufferHeight);
5614
5615         TRACE("Copying onscreen depth buffer to depth texture.\n");
5616
5617         ENTER_GL();
5618
5619         if (!device->depth_blt_texture)
5620         {
5621             glGenTextures(1, &device->depth_blt_texture);
5622         }
5623
5624         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5625          * directly on the FBO texture. That's because we need to flip. */
5626         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5627                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5628         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5629         {
5630             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5631             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5632         }
5633         else
5634         {
5635             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5636             bind_target = GL_TEXTURE_2D;
5637         }
5638         glBindTexture(bind_target, device->depth_blt_texture);
5639         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5640          * internal format, because the internal format might include stencil
5641          * data. In principle we should copy stencil data as well, but unless
5642          * the driver supports stencil export it's hard to do, and doesn't
5643          * seem to be needed in practice. If the hardware doesn't support
5644          * writing stencil data, the glCopyTexImage2D() call might trigger
5645          * software fallbacks. */
5646         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5647         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5648         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5649         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5650         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5651         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5652         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5653         glBindTexture(bind_target, old_binding);
5654
5655         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5656                 NULL, surface, SFLAG_INTEXTURE);
5657         context_set_draw_buffer(context, GL_NONE);
5658         glReadBuffer(GL_NONE);
5659
5660         /* Do the actual blit */
5661         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5662         checkGLcall("depth_blt");
5663
5664         context_invalidate_state(context, STATE_FRAMEBUFFER);
5665
5666         LEAVE_GL();
5667
5668         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5669     }
5670     else if (location == SFLAG_DS_ONSCREEN)
5671     {
5672         TRACE("Copying depth texture to onscreen depth buffer.\n");
5673
5674         ENTER_GL();
5675
5676         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5677                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5678         surface_depth_blt(surface, context, surface->texture_name,
5679                 0, surface->pow2Height - h, w, h, surface->texture_target);
5680         checkGLcall("depth_blt");
5681
5682         context_invalidate_state(context, STATE_FRAMEBUFFER);
5683
5684         LEAVE_GL();
5685
5686         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5687     }
5688     else
5689     {
5690         ERR("Invalid location (%#x) specified.\n", location);
5691     }
5692
5693     surface->flags |= location;
5694     surface->ds_current_size.cx = surface->resource.width;
5695     surface->ds_current_size.cy = surface->resource.height;
5696 }
5697
5698 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5699 {
5700     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5701     struct wined3d_surface *overlay;
5702
5703     TRACE("surface %p, location %s, persistent %#x.\n",
5704             surface, debug_surflocation(location), persistent);
5705
5706     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5707             && (location & SFLAG_INDRAWABLE))
5708         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5709
5710     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5711             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5712         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5713
5714     if (persistent)
5715     {
5716         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5717                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5718         {
5719             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5720             {
5721                 TRACE("Passing to container.\n");
5722                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5723             }
5724         }
5725         surface->flags &= ~SFLAG_LOCATIONS;
5726         surface->flags |= location;
5727
5728         /* Redraw emulated overlays, if any */
5729         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5730         {
5731             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5732             {
5733                 overlay->surface_ops->surface_draw_overlay(overlay);
5734             }
5735         }
5736     }
5737     else
5738     {
5739         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5740         {
5741             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5742             {
5743                 TRACE("Passing to container\n");
5744                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5745             }
5746         }
5747         surface->flags &= ~location;
5748     }
5749
5750     if (!(surface->flags & SFLAG_LOCATIONS))
5751     {
5752         ERR("Surface %p does not have any up to date location.\n", surface);
5753     }
5754 }
5755
5756 static DWORD resource_access_from_location(DWORD location)
5757 {
5758     switch (location)
5759     {
5760         case SFLAG_INSYSMEM:
5761             return WINED3D_RESOURCE_ACCESS_CPU;
5762
5763         case SFLAG_INDRAWABLE:
5764         case SFLAG_INSRGBTEX:
5765         case SFLAG_INTEXTURE:
5766         case SFLAG_INRB_MULTISAMPLE:
5767         case SFLAG_INRB_RESOLVED:
5768             return WINED3D_RESOURCE_ACCESS_GPU;
5769
5770         default:
5771             FIXME("Unhandled location %#x.\n", location);
5772             return 0;
5773     }
5774 }
5775
5776 static void surface_load_sysmem(struct wined3d_surface *surface,
5777         const struct wined3d_gl_info *gl_info, const RECT *rect)
5778 {
5779     surface_prepare_system_memory(surface);
5780
5781     /* Download the surface to system memory. */
5782     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5783     {
5784         struct wined3d_device *device = surface->resource.device;
5785         struct wined3d_context *context;
5786
5787         /* TODO: Use already acquired context when possible. */
5788         context = context_acquire(device, NULL);
5789
5790         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5791         surface_download_data(surface, gl_info);
5792
5793         context_release(context);
5794
5795         return;
5796     }
5797
5798     /* Note: It might be faster to download into a texture first. */
5799     read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5800             wined3d_surface_get_pitch(surface));
5801 }
5802
5803 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5804         const struct wined3d_gl_info *gl_info, const RECT *rect)
5805 {
5806     struct wined3d_device *device = surface->resource.device;
5807     struct wined3d_format format;
5808     CONVERT_TYPES convert;
5809     UINT byte_count;
5810     BYTE *mem;
5811
5812     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5813     {
5814         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5815         return WINED3DERR_INVALIDCALL;
5816     }
5817
5818     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5819         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5820
5821     if (surface->flags & SFLAG_INTEXTURE)
5822     {
5823         RECT r;
5824
5825         surface_get_rect(surface, rect, &r);
5826         surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
5827
5828         return WINED3D_OK;
5829     }
5830
5831     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5832     {
5833         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5834          * path through sysmem. */
5835         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5836     }
5837
5838     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5839
5840     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5841      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5842      * called. */
5843     if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
5844     {
5845         struct wined3d_context *context;
5846
5847         TRACE("Removing the pbo attached to surface %p.\n", surface);
5848
5849         /* TODO: Use already acquired context when possible. */
5850         context = context_acquire(device, NULL);
5851
5852         surface_remove_pbo(surface, gl_info);
5853
5854         context_release(context);
5855     }
5856
5857     if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
5858     {
5859         UINT height = surface->resource.height;
5860         UINT width = surface->resource.width;
5861         UINT src_pitch, dst_pitch;
5862
5863         byte_count = format.conv_byte_count;
5864         src_pitch = wined3d_surface_get_pitch(surface);
5865
5866         /* Stick to the alignment for the converted surface too, makes it
5867          * easier to load the surface. */
5868         dst_pitch = width * byte_count;
5869         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5870
5871         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5872         {
5873             ERR("Out of memory (%u).\n", dst_pitch * height);
5874             return E_OUTOFMEMORY;
5875         }
5876
5877         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5878                 src_pitch, width, height, dst_pitch, convert, surface);
5879
5880         surface->flags |= SFLAG_CONVERTED;
5881     }
5882     else
5883     {
5884         surface->flags &= ~SFLAG_CONVERTED;
5885         mem = surface->resource.allocatedMemory;
5886         byte_count = format.byte_count;
5887     }
5888
5889     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
5890
5891     /* Don't delete PBO memory. */
5892     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
5893         HeapFree(GetProcessHeap(), 0, mem);
5894
5895     return WINED3D_OK;
5896 }
5897
5898 static HRESULT surface_load_texture(struct wined3d_surface *surface,
5899         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
5900 {
5901     const DWORD attach_flags = WINED3DFMT_FLAG_FBO_ATTACHABLE | WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB;
5902     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
5903     struct wined3d_device *device = surface->resource.device;
5904     struct wined3d_context *context;
5905     UINT width, src_pitch, dst_pitch;
5906     struct wined3d_bo_address data;
5907     struct wined3d_format format;
5908     POINT dst_point = {0, 0};
5909     CONVERT_TYPES convert;
5910     BYTE *mem;
5911
5912     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
5913             && surface_is_offscreen(surface)
5914             && (surface->flags & SFLAG_INDRAWABLE))
5915     {
5916         read_from_framebuffer_texture(surface, srgb);
5917
5918         return WINED3D_OK;
5919     }
5920
5921     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
5922             && (surface->resource.format->flags & attach_flags) == attach_flags
5923             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5924                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
5925                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
5926     {
5927         if (srgb)
5928             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INTEXTURE,
5929                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
5930         else
5931             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INSRGBTEX,
5932                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
5933
5934         return WINED3D_OK;
5935     }
5936
5937     /* Upload from system memory */
5938
5939     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
5940             TRUE /* We will use textures */, &format, &convert);
5941
5942     if (srgb)
5943     {
5944         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
5945         {
5946             /* Performance warning... */
5947             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
5948             surface_load_location(surface, SFLAG_INSYSMEM, rect);
5949         }
5950     }
5951     else
5952     {
5953         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
5954         {
5955             /* Performance warning... */
5956             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
5957             surface_load_location(surface, SFLAG_INSYSMEM, rect);
5958         }
5959     }
5960
5961     if (!(surface->flags & SFLAG_INSYSMEM))
5962     {
5963         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
5964         /* Lets hope we get it from somewhere... */
5965         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5966     }
5967
5968     /* TODO: Use already acquired context when possible. */
5969     context = context_acquire(device, NULL);
5970
5971     surface_prepare_texture(surface, context, srgb);
5972     surface_bind_and_dirtify(surface, context, srgb);
5973
5974     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
5975     {
5976         surface->flags |= SFLAG_GLCKEY;
5977         surface->glCKey = surface->SrcBltCKey;
5978     }
5979     else surface->flags &= ~SFLAG_GLCKEY;
5980
5981     width = surface->resource.width;
5982     src_pitch = wined3d_surface_get_pitch(surface);
5983
5984     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5985      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
5986      * called. */
5987     if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
5988     {
5989         TRACE("Removing the pbo attached to surface %p.\n", surface);
5990         surface_remove_pbo(surface, gl_info);
5991     }
5992
5993     if (format.convert)
5994     {
5995         /* This code is entered for texture formats which need a fixup. */
5996         UINT height = surface->resource.height;
5997
5998         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
5999         dst_pitch = width * format.conv_byte_count;
6000         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6001
6002         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6003         {
6004             ERR("Out of memory (%u).\n", dst_pitch * height);
6005             context_release(context);
6006             return E_OUTOFMEMORY;
6007         }
6008         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6009     }
6010     else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
6011     {
6012         /* This code is only entered for color keying fixups */
6013         UINT height = surface->resource.height;
6014
6015         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6016         dst_pitch = width * format.conv_byte_count;
6017         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6018
6019         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6020         {
6021             ERR("Out of memory (%u).\n", dst_pitch * height);
6022             context_release(context);
6023             return E_OUTOFMEMORY;
6024         }
6025         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6026                 width, height, dst_pitch, convert, surface);
6027     }
6028     else
6029     {
6030         mem = surface->resource.allocatedMemory;
6031     }
6032
6033     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6034     data.addr = mem;
6035     surface_upload_data(surface, gl_info, &format, &src_rect, width, &dst_point, srgb, &data);
6036
6037     context_release(context);
6038
6039     /* Don't delete PBO memory. */
6040     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6041         HeapFree(GetProcessHeap(), 0, mem);
6042
6043     return WINED3D_OK;
6044 }
6045
6046 static void surface_multisample_resolve(struct wined3d_surface *surface)
6047 {
6048     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6049
6050     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6051         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6052
6053     surface_blt_fbo(surface->resource.device, WINED3DTEXF_POINT,
6054             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6055 }
6056
6057 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6058 {
6059     struct wined3d_device *device = surface->resource.device;
6060     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6061     HRESULT hr;
6062
6063     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6064
6065     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6066     {
6067         if (location == SFLAG_INTEXTURE)
6068         {
6069             struct wined3d_context *context = context_acquire(device, NULL);
6070             surface_load_ds_location(surface, context, SFLAG_DS_OFFSCREEN);
6071             context_release(context);
6072             return WINED3D_OK;
6073         }
6074         else
6075         {
6076             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6077             return WINED3DERR_INVALIDCALL;
6078         }
6079     }
6080
6081     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6082         location = SFLAG_INTEXTURE;
6083
6084     if (surface->flags & location)
6085     {
6086         TRACE("Location already up to date.\n");
6087         return WINED3D_OK;
6088     }
6089
6090     if (WARN_ON(d3d_surface))
6091     {
6092         DWORD required_access = resource_access_from_location(location);
6093         if ((surface->resource.access_flags & required_access) != required_access)
6094             WARN("Operation requires %#x access, but surface only has %#x.\n",
6095                     required_access, surface->resource.access_flags);
6096     }
6097
6098     if (!(surface->flags & SFLAG_LOCATIONS))
6099     {
6100         ERR("Surface %p does not have any up to date location.\n", surface);
6101         surface->flags |= SFLAG_LOST;
6102         return WINED3DERR_DEVICELOST;
6103     }
6104
6105     switch (location)
6106     {
6107         case SFLAG_INSYSMEM:
6108             surface_load_sysmem(surface, gl_info, rect);
6109             break;
6110
6111         case SFLAG_INDRAWABLE:
6112             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6113                 return hr;
6114             break;
6115
6116         case SFLAG_INRB_RESOLVED:
6117             surface_multisample_resolve(surface);
6118             break;
6119
6120         case SFLAG_INTEXTURE:
6121         case SFLAG_INSRGBTEX:
6122             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6123                 return hr;
6124             break;
6125
6126         default:
6127             ERR("Don't know how to handle location %#x.\n", location);
6128             break;
6129     }
6130
6131     if (!rect)
6132     {
6133         surface->flags |= location;
6134
6135         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6136             surface_evict_sysmem(surface);
6137     }
6138
6139     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6140             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6141     {
6142         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6143     }
6144
6145     return WINED3D_OK;
6146 }
6147
6148 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6149 {
6150     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6151
6152     /* Not on a swapchain - must be offscreen */
6153     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6154
6155     /* The front buffer is always onscreen */
6156     if (surface == swapchain->front_buffer) return FALSE;
6157
6158     /* If the swapchain is rendered to an FBO, the backbuffer is
6159      * offscreen, otherwise onscreen */
6160     return swapchain->render_to_fbo;
6161 }
6162
6163 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6164 /* Context activation is done by the caller. */
6165 static void ffp_blit_free(struct wined3d_device *device) { }
6166
6167 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6168 /* Context activation is done by the caller. */
6169 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6170 {
6171     BYTE table[256][4];
6172     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6173
6174     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6175
6176     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6177     ENTER_GL();
6178     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6179     LEAVE_GL();
6180 }
6181
6182 /* Context activation is done by the caller. */
6183 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, struct wined3d_surface *surface)
6184 {
6185     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6186
6187     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6188      * else the surface is converted in software at upload time in LoadLocation.
6189      */
6190     if(fixup == COMPLEX_FIXUP_P8 && context->gl_info->supported[EXT_PALETTED_TEXTURE])
6191         ffp_blit_p8_upload_palette(surface, context->gl_info);
6192
6193     ENTER_GL();
6194     glEnable(surface->texture_target);
6195     checkGLcall("glEnable(surface->texture_target)");
6196     LEAVE_GL();
6197     return WINED3D_OK;
6198 }
6199
6200 /* Context activation is done by the caller. */
6201 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6202 {
6203     ENTER_GL();
6204     glDisable(GL_TEXTURE_2D);
6205     checkGLcall("glDisable(GL_TEXTURE_2D)");
6206     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6207     {
6208         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6209         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6210     }
6211     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6212     {
6213         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6214         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6215     }
6216     LEAVE_GL();
6217 }
6218
6219 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6220         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6221         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6222 {
6223     enum complex_fixup src_fixup;
6224
6225     switch (blit_op)
6226     {
6227         case WINED3D_BLIT_OP_COLOR_BLIT:
6228             if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
6229                 return FALSE;
6230
6231             src_fixup = get_complex_fixup(src_format->color_fixup);
6232             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6233             {
6234                 TRACE("Checking support for fixup:\n");
6235                 dump_color_fixup_desc(src_format->color_fixup);
6236             }
6237
6238             if (!is_identity_fixup(dst_format->color_fixup))
6239             {
6240                 TRACE("Destination fixups are not supported\n");
6241                 return FALSE;
6242             }
6243
6244             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6245             {
6246                 TRACE("P8 fixup supported\n");
6247                 return TRUE;
6248             }
6249
6250             /* We only support identity conversions. */
6251             if (is_identity_fixup(src_format->color_fixup))
6252             {
6253                 TRACE("[OK]\n");
6254                 return TRUE;
6255             }
6256
6257             TRACE("[FAILED]\n");
6258             return FALSE;
6259
6260         case WINED3D_BLIT_OP_COLOR_FILL:
6261             if (dst_pool == WINED3DPOOL_SYSTEMMEM)
6262                 return FALSE;
6263
6264             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6265             {
6266                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6267                     return FALSE;
6268             }
6269             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6270             {
6271                 TRACE("Color fill not supported\n");
6272                 return FALSE;
6273             }
6274
6275             /* FIXME: We should reject color fills on formats with fixups,
6276              * but this would break P8 color fills for example. */
6277
6278             return TRUE;
6279
6280         case WINED3D_BLIT_OP_DEPTH_FILL:
6281             return TRUE;
6282
6283         default:
6284             TRACE("Unsupported blit_op=%d\n", blit_op);
6285             return FALSE;
6286     }
6287 }
6288
6289 /* Do not call while under the GL lock. */
6290 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6291         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
6292 {
6293     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6294     struct wined3d_fb_state fb = {&dst_surface, NULL};
6295
6296     return device_clear_render_targets(device, 1, &fb,
6297             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6298 }
6299
6300 /* Do not call while under the GL lock. */
6301 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6302         struct wined3d_surface *surface, const RECT *rect, float depth)
6303 {
6304     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6305     struct wined3d_fb_state fb = {NULL, surface};
6306
6307     return device_clear_render_targets(device, 0, &fb,
6308             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6309 }
6310
6311 const struct blit_shader ffp_blit =  {
6312     ffp_blit_alloc,
6313     ffp_blit_free,
6314     ffp_blit_set,
6315     ffp_blit_unset,
6316     ffp_blit_supported,
6317     ffp_blit_color_fill,
6318     ffp_blit_depth_fill,
6319 };
6320
6321 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6322 {
6323     return WINED3D_OK;
6324 }
6325
6326 /* Context activation is done by the caller. */
6327 static void cpu_blit_free(struct wined3d_device *device)
6328 {
6329 }
6330
6331 /* Context activation is done by the caller. */
6332 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, struct wined3d_surface *surface)
6333 {
6334     return WINED3D_OK;
6335 }
6336
6337 /* Context activation is done by the caller. */
6338 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6339 {
6340 }
6341
6342 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6343         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6344         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6345 {
6346     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6347     {
6348         return TRUE;
6349     }
6350
6351     return FALSE;
6352 }
6353
6354 static HRESULT surface_cpu_blt_compressed(const BYTE *src_data, BYTE *dst_data,
6355         UINT src_pitch, UINT dst_pitch, UINT update_w, UINT update_h,
6356         const struct wined3d_format *format, DWORD flags, const WINEDDBLTFX *fx)
6357 {
6358     UINT row_block_count;
6359     const BYTE *src_row;
6360     BYTE *dst_row;
6361     UINT x, y;
6362
6363     src_row = src_data;
6364     dst_row = dst_data;
6365
6366     row_block_count = (update_w + format->block_width - 1) / format->block_width;
6367
6368     if (!flags)
6369     {
6370         for (y = 0; y < update_h; y += format->block_height)
6371         {
6372             memcpy(dst_row, src_row, row_block_count * format->block_byte_count);
6373             src_row += src_pitch;
6374             dst_row += dst_pitch;
6375         }
6376
6377         return WINED3D_OK;
6378     }
6379
6380     if (flags == WINEDDBLT_DDFX && fx->dwDDFX == WINEDDBLTFX_MIRRORUPDOWN)
6381     {
6382         src_row += (((update_h / format->block_height) - 1) * src_pitch);
6383
6384         switch (format->id)
6385         {
6386             case WINED3DFMT_DXT1:
6387                 for (y = 0; y < update_h; y += format->block_height)
6388                 {
6389                     struct block
6390                     {
6391                         WORD color[2];
6392                         BYTE control_row[4];
6393                     };
6394
6395                     const struct block *s = (const struct block *)src_row;
6396                     struct block *d = (struct block *)dst_row;
6397
6398                     for (x = 0; x < row_block_count; ++x)
6399                     {
6400                         d[x].color[0] = s[x].color[0];
6401                         d[x].color[1] = s[x].color[1];
6402                         d[x].control_row[0] = s[x].control_row[3];
6403                         d[x].control_row[1] = s[x].control_row[2];
6404                         d[x].control_row[2] = s[x].control_row[1];
6405                         d[x].control_row[3] = s[x].control_row[0];
6406                     }
6407                     src_row -= src_pitch;
6408                     dst_row += dst_pitch;
6409                 }
6410                 return WINED3D_OK;
6411
6412             case WINED3DFMT_DXT3:
6413                 for (y = 0; y < update_h; y += format->block_height)
6414                 {
6415                     struct block
6416                     {
6417                         WORD alpha_row[4];
6418                         WORD color[2];
6419                         BYTE control_row[4];
6420                     };
6421
6422                     const struct block *s = (const struct block *)src_row;
6423                     struct block *d = (struct block *)dst_row;
6424
6425                     for (x = 0; x < row_block_count; ++x)
6426                     {
6427                         d[x].alpha_row[0] = s[x].alpha_row[3];
6428                         d[x].alpha_row[1] = s[x].alpha_row[2];
6429                         d[x].alpha_row[2] = s[x].alpha_row[1];
6430                         d[x].alpha_row[3] = s[x].alpha_row[0];
6431                         d[x].color[0] = s[x].color[0];
6432                         d[x].color[1] = s[x].color[1];
6433                         d[x].control_row[0] = s[x].control_row[3];
6434                         d[x].control_row[1] = s[x].control_row[2];
6435                         d[x].control_row[2] = s[x].control_row[1];
6436                         d[x].control_row[3] = s[x].control_row[0];
6437                     }
6438                     src_row -= src_pitch;
6439                     dst_row += dst_pitch;
6440                 }
6441                 return WINED3D_OK;
6442
6443             default:
6444                 FIXME("Compressed flip not implemented for format %s.\n",
6445                         debug_d3dformat(format->id));
6446                 return E_NOTIMPL;
6447         }
6448     }
6449
6450     FIXME("Unsupported blit on compressed surface (format %s, flags %#x, DDFX %#x).\n",
6451             debug_d3dformat(format->id), flags, flags & WINEDDBLT_DDFX ? fx->dwDDFX : 0);
6452
6453     return E_NOTIMPL;
6454 }
6455
6456 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6457         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6458         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
6459 {
6460     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6461     const struct wined3d_format *src_format, *dst_format;
6462     struct wined3d_surface *orig_src = src_surface;
6463     WINED3DLOCKED_RECT dlock, slock;
6464     HRESULT hr = WINED3D_OK;
6465     const BYTE *sbuf;
6466     RECT xdst,xsrc;
6467     BYTE *dbuf;
6468     int x, y;
6469
6470     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6471             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6472             flags, fx, debug_d3dtexturefiltertype(filter));
6473
6474     xsrc = *src_rect;
6475
6476     if (!src_surface)
6477     {
6478         RECT full_rect;
6479
6480         full_rect.left = 0;
6481         full_rect.top = 0;
6482         full_rect.right = dst_surface->resource.width;
6483         full_rect.bottom = dst_surface->resource.height;
6484         IntersectRect(&xdst, &full_rect, dst_rect);
6485     }
6486     else
6487     {
6488         BOOL clip_horiz, clip_vert;
6489
6490         xdst = *dst_rect;
6491         clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6492         clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6493
6494         if (clip_vert || clip_horiz)
6495         {
6496             /* Now check if this is a special case or not... */
6497             if ((flags & WINEDDBLT_DDFX)
6498                     || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6499                     || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6500             {
6501                 WARN("Out of screen rectangle in special case. Not handled right now.\n");
6502                 return WINED3D_OK;
6503             }
6504
6505             if (clip_horiz)
6506             {
6507                 if (xdst.left < 0)
6508                 {
6509                     xsrc.left -= xdst.left;
6510                     xdst.left = 0;
6511                 }
6512                 if (xdst.right > dst_surface->resource.width)
6513                 {
6514                     xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6515                     xdst.right = (int)dst_surface->resource.width;
6516                 }
6517             }
6518
6519             if (clip_vert)
6520             {
6521                 if (xdst.top < 0)
6522                 {
6523                     xsrc.top -= xdst.top;
6524                     xdst.top = 0;
6525                 }
6526                 if (xdst.bottom > dst_surface->resource.height)
6527                 {
6528                     xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6529                     xdst.bottom = (int)dst_surface->resource.height;
6530                 }
6531             }
6532
6533             /* And check if after clipping something is still to be done... */
6534             if ((xdst.right <= 0) || (xdst.bottom <= 0)
6535                     || (xdst.left >= (int)dst_surface->resource.width)
6536                     || (xdst.top >= (int)dst_surface->resource.height)
6537                     || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6538                     || (xsrc.left >= (int)src_surface->resource.width)
6539                     || (xsrc.top >= (int)src_surface->resource.height))
6540             {
6541                 TRACE("Nothing to be done after clipping.\n");
6542                 return WINED3D_OK;
6543             }
6544         }
6545     }
6546
6547     if (src_surface == dst_surface)
6548     {
6549         wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6550         slock = dlock;
6551         src_format = dst_surface->resource.format;
6552         dst_format = src_format;
6553     }
6554     else
6555     {
6556         dst_format = dst_surface->resource.format;
6557         if (src_surface)
6558         {
6559             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6560             {
6561                 src_surface = surface_convert_format(src_surface, dst_format->id);
6562                 if (!src_surface)
6563                 {
6564                     /* The conv function writes a FIXME */
6565                     WARN("Cannot convert source surface format to dest format.\n");
6566                     goto release;
6567                 }
6568             }
6569             wined3d_surface_map(src_surface, &slock, NULL, WINED3DLOCK_READONLY);
6570             src_format = src_surface->resource.format;
6571         }
6572         else
6573         {
6574             src_format = dst_format;
6575         }
6576         if (dst_rect)
6577             wined3d_surface_map(dst_surface, &dlock, &xdst, 0);
6578         else
6579             wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6580     }
6581
6582     bpp = dst_surface->resource.format->byte_count;
6583     srcheight = xsrc.bottom - xsrc.top;
6584     srcwidth = xsrc.right - xsrc.left;
6585     dstheight = xdst.bottom - xdst.top;
6586     dstwidth = xdst.right - xdst.left;
6587     width = (xdst.right - xdst.left) * bpp;
6588
6589     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_COMPRESSED)
6590     {
6591         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6592
6593         if (src_surface == dst_surface)
6594         {
6595             FIXME("Only plain blits supported on compressed surfaces.\n");
6596             hr = E_NOTIMPL;
6597             goto release;
6598         }
6599
6600         if (srcheight != dstheight || srcwidth != dstwidth)
6601         {
6602             WARN("Stretching not supported on compressed surfaces.\n");
6603             hr = WINED3DERR_INVALIDCALL;
6604             goto release;
6605         }
6606
6607         if (srcwidth & (src_format->block_width - 1) || srcheight & (src_format->block_height - 1))
6608         {
6609             WARN("Rectangle not block-aligned.\n");
6610             hr = WINED3DERR_INVALIDCALL;
6611             goto release;
6612         }
6613
6614         hr = surface_cpu_blt_compressed(slock.pBits, dlock.pBits,
6615                 slock.Pitch, dlock.Pitch, dstwidth, dstheight,
6616                 src_format, flags, fx);
6617         goto release;
6618     }
6619
6620     if (dst_rect && src_surface != dst_surface)
6621         dbuf = dlock.pBits;
6622     else
6623         dbuf = (BYTE*)dlock.pBits+(xdst.top*dlock.Pitch)+(xdst.left*bpp);
6624
6625     /* First, all the 'source-less' blits */
6626     if (flags & WINEDDBLT_COLORFILL)
6627     {
6628         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dlock.Pitch, fx->u5.dwFillColor);
6629         flags &= ~WINEDDBLT_COLORFILL;
6630     }
6631
6632     if (flags & WINEDDBLT_DEPTHFILL)
6633     {
6634         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6635     }
6636     if (flags & WINEDDBLT_ROP)
6637     {
6638         /* Catch some degenerate cases here. */
6639         switch (fx->dwROP)
6640         {
6641             case BLACKNESS:
6642                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,0);
6643                 break;
6644             case 0xAA0029: /* No-op */
6645                 break;
6646             case WHITENESS:
6647                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,~0);
6648                 break;
6649             case SRCCOPY: /* Well, we do that below? */
6650                 break;
6651             default:
6652                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6653                 goto error;
6654         }
6655         flags &= ~WINEDDBLT_ROP;
6656     }
6657     if (flags & WINEDDBLT_DDROPS)
6658     {
6659         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6660     }
6661     /* Now the 'with source' blits. */
6662     if (src_surface)
6663     {
6664         const BYTE *sbase;
6665         int sx, xinc, sy, yinc;
6666
6667         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6668             goto release;
6669
6670         if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
6671                 && (srcwidth != dstwidth || srcheight != dstheight))
6672         {
6673             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6674             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6675         }
6676
6677         sbase = (BYTE*)slock.pBits+(xsrc.top*slock.Pitch)+xsrc.left*bpp;
6678         xinc = (srcwidth << 16) / dstwidth;
6679         yinc = (srcheight << 16) / dstheight;
6680
6681         if (!flags)
6682         {
6683             /* No effects, we can cheat here. */
6684             if (dstwidth == srcwidth)
6685             {
6686                 if (dstheight == srcheight)
6687                 {
6688                     /* No stretching in either direction. This needs to be as
6689                      * fast as possible. */
6690                     sbuf = sbase;
6691
6692                     /* Check for overlapping surfaces. */
6693                     if (src_surface != dst_surface || xdst.top < xsrc.top
6694                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6695                     {
6696                         /* No overlap, or dst above src, so copy from top downwards. */
6697                         for (y = 0; y < dstheight; ++y)
6698                         {
6699                             memcpy(dbuf, sbuf, width);
6700                             sbuf += slock.Pitch;
6701                             dbuf += dlock.Pitch;
6702                         }
6703                     }
6704                     else if (xdst.top > xsrc.top)
6705                     {
6706                         /* Copy from bottom upwards. */
6707                         sbuf += (slock.Pitch*dstheight);
6708                         dbuf += (dlock.Pitch*dstheight);
6709                         for (y = 0; y < dstheight; ++y)
6710                         {
6711                             sbuf -= slock.Pitch;
6712                             dbuf -= dlock.Pitch;
6713                             memcpy(dbuf, sbuf, width);
6714                         }
6715                     }
6716                     else
6717                     {
6718                         /* Src and dst overlapping on the same line, use memmove. */
6719                         for (y = 0; y < dstheight; ++y)
6720                         {
6721                             memmove(dbuf, sbuf, width);
6722                             sbuf += slock.Pitch;
6723                             dbuf += dlock.Pitch;
6724                         }
6725                     }
6726                 }
6727                 else
6728                 {
6729                     /* Stretching in y direction only. */
6730                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6731                     {
6732                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6733                         memcpy(dbuf, sbuf, width);
6734                         dbuf += dlock.Pitch;
6735                     }
6736                 }
6737             }
6738             else
6739             {
6740                 /* Stretching in X direction. */
6741                 int last_sy = -1;
6742                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6743                 {
6744                     sbuf = sbase + (sy >> 16) * slock.Pitch;
6745
6746                     if ((sy >> 16) == (last_sy >> 16))
6747                     {
6748                         /* This source row is the same as last source row -
6749                          * Copy the already stretched row. */
6750                         memcpy(dbuf, dbuf - dlock.Pitch, width);
6751                     }
6752                     else
6753                     {
6754 #define STRETCH_ROW(type) \
6755 do { \
6756     const type *s = (const type *)sbuf; \
6757     type *d = (type *)dbuf; \
6758     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6759         d[x] = s[sx >> 16]; \
6760 } while(0)
6761
6762                         switch(bpp)
6763                         {
6764                             case 1:
6765                                 STRETCH_ROW(BYTE);
6766                                 break;
6767                             case 2:
6768                                 STRETCH_ROW(WORD);
6769                                 break;
6770                             case 4:
6771                                 STRETCH_ROW(DWORD);
6772                                 break;
6773                             case 3:
6774                             {
6775                                 const BYTE *s;
6776                                 BYTE *d = dbuf;
6777                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6778                                 {
6779                                     DWORD pixel;
6780
6781                                     s = sbuf + 3 * (sx >> 16);
6782                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6783                                     d[0] = (pixel      ) & 0xff;
6784                                     d[1] = (pixel >>  8) & 0xff;
6785                                     d[2] = (pixel >> 16) & 0xff;
6786                                     d += 3;
6787                                 }
6788                                 break;
6789                             }
6790                             default:
6791                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6792                                 hr = WINED3DERR_NOTAVAILABLE;
6793                                 goto error;
6794                         }
6795 #undef STRETCH_ROW
6796                     }
6797                     dbuf += dlock.Pitch;
6798                     last_sy = sy;
6799                 }
6800             }
6801         }
6802         else
6803         {
6804             LONG dstyinc = dlock.Pitch, dstxinc = bpp;
6805             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6806             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6807             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6808             {
6809                 /* The color keying flags are checked for correctness in ddraw */
6810                 if (flags & WINEDDBLT_KEYSRC)
6811                 {
6812                     keylow  = src_surface->SrcBltCKey.dwColorSpaceLowValue;
6813                     keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
6814                 }
6815                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6816                 {
6817                     keylow = fx->ddckSrcColorkey.dwColorSpaceLowValue;
6818                     keyhigh = fx->ddckSrcColorkey.dwColorSpaceHighValue;
6819                 }
6820
6821                 if (flags & WINEDDBLT_KEYDEST)
6822                 {
6823                     /* Destination color keys are taken from the source surface! */
6824                     destkeylow = src_surface->DestBltCKey.dwColorSpaceLowValue;
6825                     destkeyhigh = src_surface->DestBltCKey.dwColorSpaceHighValue;
6826                 }
6827                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6828                 {
6829                     destkeylow = fx->ddckDestColorkey.dwColorSpaceLowValue;
6830                     destkeyhigh = fx->ddckDestColorkey.dwColorSpaceHighValue;
6831                 }
6832
6833                 if (bpp == 1)
6834                 {
6835                     keymask = 0xff;
6836                 }
6837                 else
6838                 {
6839                     keymask = src_format->red_mask
6840                             | src_format->green_mask
6841                             | src_format->blue_mask;
6842                 }
6843                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6844             }
6845
6846             if (flags & WINEDDBLT_DDFX)
6847             {
6848                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6849                 LONG tmpxy;
6850                 dTopLeft     = dbuf;
6851                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6852                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dlock.Pitch);
6853                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6854
6855                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6856                 {
6857                     /* I don't think we need to do anything about this flag */
6858                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6859                 }
6860                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6861                 {
6862                     tmp          = dTopRight;
6863                     dTopRight    = dTopLeft;
6864                     dTopLeft     = tmp;
6865                     tmp          = dBottomRight;
6866                     dBottomRight = dBottomLeft;
6867                     dBottomLeft  = tmp;
6868                     dstxinc = dstxinc * -1;
6869                 }
6870                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6871                 {
6872                     tmp          = dTopLeft;
6873                     dTopLeft     = dBottomLeft;
6874                     dBottomLeft  = tmp;
6875                     tmp          = dTopRight;
6876                     dTopRight    = dBottomRight;
6877                     dBottomRight = tmp;
6878                     dstyinc = dstyinc * -1;
6879                 }
6880                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6881                 {
6882                     /* I don't think we need to do anything about this flag */
6883                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6884                 }
6885                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6886                 {
6887                     tmp          = dBottomRight;
6888                     dBottomRight = dTopLeft;
6889                     dTopLeft     = tmp;
6890                     tmp          = dBottomLeft;
6891                     dBottomLeft  = dTopRight;
6892                     dTopRight    = tmp;
6893                     dstxinc = dstxinc * -1;
6894                     dstyinc = dstyinc * -1;
6895                 }
6896                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6897                 {
6898                     tmp          = dTopLeft;
6899                     dTopLeft     = dBottomLeft;
6900                     dBottomLeft  = dBottomRight;
6901                     dBottomRight = dTopRight;
6902                     dTopRight    = tmp;
6903                     tmpxy   = dstxinc;
6904                     dstxinc = dstyinc;
6905                     dstyinc = tmpxy;
6906                     dstxinc = dstxinc * -1;
6907                 }
6908                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6909                 {
6910                     tmp          = dTopLeft;
6911                     dTopLeft     = dTopRight;
6912                     dTopRight    = dBottomRight;
6913                     dBottomRight = dBottomLeft;
6914                     dBottomLeft  = tmp;
6915                     tmpxy   = dstxinc;
6916                     dstxinc = dstyinc;
6917                     dstyinc = tmpxy;
6918                     dstyinc = dstyinc * -1;
6919                 }
6920                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6921                 {
6922                     /* I don't think we need to do anything about this flag */
6923                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6924                 }
6925                 dbuf = dTopLeft;
6926                 flags &= ~(WINEDDBLT_DDFX);
6927             }
6928
6929 #define COPY_COLORKEY_FX(type) \
6930 do { \
6931     const type *s; \
6932     type *d = (type *)dbuf, *dx, tmp; \
6933     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
6934     { \
6935         s = (const type *)(sbase + (sy >> 16) * slock.Pitch); \
6936         dx = d; \
6937         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6938         { \
6939             tmp = s[sx >> 16]; \
6940             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
6941                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
6942             { \
6943                 dx[0] = tmp; \
6944             } \
6945             dx = (type *)(((BYTE *)dx) + dstxinc); \
6946         } \
6947         d = (type *)(((BYTE *)d) + dstyinc); \
6948     } \
6949 } while(0)
6950
6951             switch (bpp)
6952             {
6953                 case 1:
6954                     COPY_COLORKEY_FX(BYTE);
6955                     break;
6956                 case 2:
6957                     COPY_COLORKEY_FX(WORD);
6958                     break;
6959                 case 4:
6960                     COPY_COLORKEY_FX(DWORD);
6961                     break;
6962                 case 3:
6963                 {
6964                     const BYTE *s;
6965                     BYTE *d = dbuf, *dx;
6966                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6967                     {
6968                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6969                         dx = d;
6970                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
6971                         {
6972                             DWORD pixel, dpixel = 0;
6973                             s = sbuf + 3 * (sx>>16);
6974                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6975                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
6976                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
6977                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
6978                             {
6979                                 dx[0] = (pixel      ) & 0xff;
6980                                 dx[1] = (pixel >>  8) & 0xff;
6981                                 dx[2] = (pixel >> 16) & 0xff;
6982                             }
6983                             dx += dstxinc;
6984                         }
6985                         d += dstyinc;
6986                     }
6987                     break;
6988                 }
6989                 default:
6990                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
6991                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
6992                     hr = WINED3DERR_NOTAVAILABLE;
6993                     goto error;
6994 #undef COPY_COLORKEY_FX
6995             }
6996         }
6997     }
6998
6999 error:
7000     if (flags && FIXME_ON(d3d_surface))
7001     {
7002         FIXME("\tUnsupported flags: %#x.\n", flags);
7003     }
7004
7005 release:
7006     wined3d_surface_unmap(dst_surface);
7007     if (src_surface && src_surface != dst_surface)
7008         wined3d_surface_unmap(src_surface);
7009     /* Release the converted surface, if any. */
7010     if (src_surface && src_surface != orig_src)
7011         wined3d_surface_decref(src_surface);
7012
7013     return hr;
7014 }
7015
7016 /* Do not call while under the GL lock. */
7017 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7018         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
7019 {
7020     static const RECT src_rect;
7021     WINEDDBLTFX BltFx;
7022
7023     memset(&BltFx, 0, sizeof(BltFx));
7024     BltFx.dwSize = sizeof(BltFx);
7025     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7026     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7027             WINEDDBLT_COLORFILL, &BltFx, WINED3DTEXF_POINT);
7028 }
7029
7030 /* Do not call while under the GL lock. */
7031 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7032         struct wined3d_surface *surface, const RECT *rect, float depth)
7033 {
7034     FIXME("Depth filling not implemented by cpu_blit.\n");
7035     return WINED3DERR_INVALIDCALL;
7036 }
7037
7038 const struct blit_shader cpu_blit =  {
7039     cpu_blit_alloc,
7040     cpu_blit_free,
7041     cpu_blit_set,
7042     cpu_blit_unset,
7043     cpu_blit_supported,
7044     cpu_blit_color_fill,
7045     cpu_blit_depth_fill,
7046 };
7047
7048 static HRESULT surface_init(struct wined3d_surface *surface, WINED3DSURFTYPE surface_type, UINT alignment,
7049         UINT width, UINT height, UINT level, BOOL lockable, BOOL discard, WINED3DMULTISAMPLE_TYPE multisample_type,
7050         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7051         WINED3DPOOL pool, void *parent, const struct wined3d_parent_ops *parent_ops)
7052 {
7053     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7054     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7055     unsigned int resource_size;
7056     HRESULT hr;
7057
7058     if (multisample_quality > 0)
7059     {
7060         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7061         multisample_quality = 0;
7062     }
7063
7064     /* Quick lockable sanity check.
7065      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7066      * this function is too deep to need to care about things like this.
7067      * Levels need to be checked too, since they all affect what can be done. */
7068     switch (pool)
7069     {
7070         case WINED3DPOOL_SCRATCH:
7071             if (!lockable)
7072             {
7073                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7074                         "which are mutually exclusive, setting lockable to TRUE.\n");
7075                 lockable = TRUE;
7076             }
7077             break;
7078
7079         case WINED3DPOOL_SYSTEMMEM:
7080             if (!lockable)
7081                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7082             break;
7083
7084         case WINED3DPOOL_MANAGED:
7085             if (usage & WINED3DUSAGE_DYNAMIC)
7086                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7087             break;
7088
7089         case WINED3DPOOL_DEFAULT:
7090             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7091                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7092             break;
7093
7094         default:
7095             FIXME("Unknown pool %#x.\n", pool);
7096             break;
7097     };
7098
7099     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3DPOOL_DEFAULT)
7100         FIXME("Trying to create a render target that isn't in the default pool.\n");
7101
7102     /* FIXME: Check that the format is supported by the device. */
7103
7104     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7105     if (!resource_size)
7106         return WINED3DERR_INVALIDCALL;
7107
7108     surface->surface_type = surface_type;
7109
7110     switch (surface_type)
7111     {
7112         case SURFACE_OPENGL:
7113             surface->surface_ops = &surface_ops;
7114             break;
7115
7116         case SURFACE_GDI:
7117             surface->surface_ops = &gdi_surface_ops;
7118             break;
7119
7120         default:
7121             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7122             return WINED3DERR_INVALIDCALL;
7123     }
7124
7125     hr = resource_init(&surface->resource, device, WINED3DRTYPE_SURFACE, format,
7126             multisample_type, multisample_quality, usage, pool, width, height, 1,
7127             resource_size, parent, parent_ops, &surface_resource_ops);
7128     if (FAILED(hr))
7129     {
7130         WARN("Failed to initialize resource, returning %#x.\n", hr);
7131         return hr;
7132     }
7133
7134     /* "Standalone" surface. */
7135     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7136
7137     surface->texture_level = level;
7138     list_init(&surface->overlays);
7139
7140     /* Flags */
7141     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7142     if (discard)
7143         surface->flags |= SFLAG_DISCARD;
7144     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7145         surface->flags |= SFLAG_LOCKABLE;
7146     /* I'm not sure if this qualifies as a hack or as an optimization. It
7147      * seems reasonable to assume that lockable render targets will get
7148      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7149      * creation. However, the other reason we want to do this is that several
7150      * ddraw applications access surface memory while the surface isn't
7151      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7152      * future locks prevents these from crashing. */
7153     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7154         surface->flags |= SFLAG_DYNLOCK;
7155
7156     /* Mark the texture as dirty so that it gets loaded first time around. */
7157     surface_add_dirty_rect(surface, NULL);
7158     list_init(&surface->renderbuffers);
7159
7160     TRACE("surface %p, memory %p, size %u\n",
7161             surface, surface->resource.allocatedMemory, surface->resource.size);
7162
7163     /* Call the private setup routine */
7164     hr = surface->surface_ops->surface_private_setup(surface);
7165     if (FAILED(hr))
7166     {
7167         ERR("Private setup failed, returning %#x\n", hr);
7168         surface->surface_ops->surface_cleanup(surface);
7169         return hr;
7170     }
7171
7172     return hr;
7173 }
7174
7175 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7176         enum wined3d_format_id format_id, BOOL lockable, BOOL discard, UINT level, DWORD usage, WINED3DPOOL pool,
7177         WINED3DMULTISAMPLE_TYPE multisample_type, DWORD multisample_quality, WINED3DSURFTYPE surface_type,
7178         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7179 {
7180     struct wined3d_surface *object;
7181     HRESULT hr;
7182
7183     TRACE("device %p, width %u, height %u, format %s, lockable %#x, discard %#x, level %u\n",
7184             device, width, height, debug_d3dformat(format_id), lockable, discard, level);
7185     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7186             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7187     TRACE("surface_type %#x, parent %p, parent_ops %p.\n", surface_type, parent, parent_ops);
7188
7189     if (surface_type == SURFACE_OPENGL && !device->adapter)
7190     {
7191         ERR("OpenGL surfaces are not available without OpenGL.\n");
7192         return WINED3DERR_NOTAVAILABLE;
7193     }
7194
7195     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7196     if (!object)
7197     {
7198         ERR("Failed to allocate surface memory.\n");
7199         return WINED3DERR_OUTOFVIDEOMEMORY;
7200     }
7201
7202     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level, lockable,
7203             discard, multisample_type, multisample_quality, device, usage, format_id, pool, parent, parent_ops);
7204     if (FAILED(hr))
7205     {
7206         WARN("Failed to initialize surface, returning %#x.\n", hr);
7207         HeapFree(GetProcessHeap(), 0, object);
7208         return hr;
7209     }
7210
7211     TRACE("Created surface %p.\n", object);
7212     *surface = object;
7213
7214     return hr;
7215 }