wined3d: Rename WineD3DAdapterChangeGLRam() to adapter_adjust_memory().
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2008 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         WINED3DTEXTUREFILTERTYPE filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     TRACE("surface %p.\n", surface);
46
47     if (surface->texture_name || (surface->flags & SFLAG_PBO)
48              || surface->rb_multisample || surface->rb_resolved
49              || !list_empty(&surface->renderbuffers))
50     {
51         struct wined3d_renderbuffer_entry *entry, *entry2;
52         const struct wined3d_gl_info *gl_info;
53         struct wined3d_context *context;
54
55         context = context_acquire(surface->resource.device, NULL);
56         gl_info = context->gl_info;
57
58         ENTER_GL();
59
60         if (surface->texture_name)
61         {
62             TRACE("Deleting texture %u.\n", surface->texture_name);
63             glDeleteTextures(1, &surface->texture_name);
64         }
65
66         if (surface->flags & SFLAG_PBO)
67         {
68             TRACE("Deleting PBO %u.\n", surface->pbo);
69             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
70         }
71
72         if (surface->rb_multisample)
73         {
74             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
75             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
76         }
77
78         if (surface->rb_resolved)
79         {
80             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
81             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
82         }
83
84         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
85         {
86             TRACE("Deleting renderbuffer %u.\n", entry->id);
87             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
88             HeapFree(GetProcessHeap(), 0, entry);
89         }
90
91         LEAVE_GL();
92
93         context_release(context);
94     }
95
96     if (surface->flags & SFLAG_DIBSECTION)
97     {
98         /* Release the DC. */
99         SelectObject(surface->hDC, surface->dib.holdbitmap);
100         DeleteDC(surface->hDC);
101         /* Release the DIB section. */
102         DeleteObject(surface->dib.DIBsection);
103         surface->dib.bitmap_data = NULL;
104         surface->resource.allocatedMemory = NULL;
105     }
106
107     if (surface->flags & SFLAG_USERPTR)
108         wined3d_surface_set_mem(surface, NULL);
109     if (surface->overlay_dest)
110         list_remove(&surface->overlay_entry);
111
112     HeapFree(GetProcessHeap(), 0, surface->palette9);
113
114     resource_cleanup(&surface->resource);
115 }
116
117 void surface_update_draw_binding(struct wined3d_surface *surface)
118 {
119     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
120         surface->draw_binding = SFLAG_INDRAWABLE;
121     else if (surface->resource.multisample_type)
122         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
123     else
124         surface->draw_binding = SFLAG_INTEXTURE;
125 }
126
127 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
128 {
129     TRACE("surface %p, container %p.\n", surface, container);
130
131     if (!container && type != WINED3D_CONTAINER_NONE)
132         ERR("Setting NULL container of type %#x.\n", type);
133
134     if (type == WINED3D_CONTAINER_SWAPCHAIN)
135     {
136         surface->get_drawable_size = get_drawable_size_swapchain;
137     }
138     else
139     {
140         switch (wined3d_settings.offscreen_rendering_mode)
141         {
142             case ORM_FBO:
143                 surface->get_drawable_size = get_drawable_size_fbo;
144                 break;
145
146             case ORM_BACKBUFFER:
147                 surface->get_drawable_size = get_drawable_size_backbuffer;
148                 break;
149
150             default:
151                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
152                 return;
153         }
154     }
155
156     surface->container.type = type;
157     surface->container.u.base = container;
158     surface_update_draw_binding(surface);
159 }
160
161 struct blt_info
162 {
163     GLenum binding;
164     GLenum bind_target;
165     enum tex_types tex_type;
166     GLfloat coords[4][3];
167 };
168
169 struct float_rect
170 {
171     float l;
172     float t;
173     float r;
174     float b;
175 };
176
177 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
178 {
179     f->l = ((r->left * 2.0f) / w) - 1.0f;
180     f->t = ((r->top * 2.0f) / h) - 1.0f;
181     f->r = ((r->right * 2.0f) / w) - 1.0f;
182     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
183 }
184
185 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
186 {
187     GLfloat (*coords)[3] = info->coords;
188     struct float_rect f;
189
190     switch (target)
191     {
192         default:
193             FIXME("Unsupported texture target %#x\n", target);
194             /* Fall back to GL_TEXTURE_2D */
195         case GL_TEXTURE_2D:
196             info->binding = GL_TEXTURE_BINDING_2D;
197             info->bind_target = GL_TEXTURE_2D;
198             info->tex_type = tex_2d;
199             coords[0][0] = (float)rect->left / w;
200             coords[0][1] = (float)rect->top / h;
201             coords[0][2] = 0.0f;
202
203             coords[1][0] = (float)rect->right / w;
204             coords[1][1] = (float)rect->top / h;
205             coords[1][2] = 0.0f;
206
207             coords[2][0] = (float)rect->left / w;
208             coords[2][1] = (float)rect->bottom / h;
209             coords[2][2] = 0.0f;
210
211             coords[3][0] = (float)rect->right / w;
212             coords[3][1] = (float)rect->bottom / h;
213             coords[3][2] = 0.0f;
214             break;
215
216         case GL_TEXTURE_RECTANGLE_ARB:
217             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
218             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
219             info->tex_type = tex_rect;
220             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
221             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
222             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
223             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
224             break;
225
226         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
227             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
228             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
229             info->tex_type = tex_cube;
230             cube_coords_float(rect, w, h, &f);
231
232             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
233             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
234             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
235             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
236             break;
237
238         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
239             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
240             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
241             info->tex_type = tex_cube;
242             cube_coords_float(rect, w, h, &f);
243
244             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
245             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
246             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
247             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
248             break;
249
250         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
251             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
252             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
253             info->tex_type = tex_cube;
254             cube_coords_float(rect, w, h, &f);
255
256             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
257             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
258             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
259             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
260             break;
261
262         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
263             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
264             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
265             info->tex_type = tex_cube;
266             cube_coords_float(rect, w, h, &f);
267
268             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
269             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
270             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
271             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
272             break;
273
274         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
275             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
276             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
277             info->tex_type = tex_cube;
278             cube_coords_float(rect, w, h, &f);
279
280             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
281             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
282             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
283             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
284             break;
285
286         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
287             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
288             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
289             info->tex_type = tex_cube;
290             cube_coords_float(rect, w, h, &f);
291
292             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
293             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
294             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
295             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
296             break;
297     }
298 }
299
300 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
301 {
302     if (rect_in)
303         *rect_out = *rect_in;
304     else
305     {
306         rect_out->left = 0;
307         rect_out->top = 0;
308         rect_out->right = surface->resource.width;
309         rect_out->bottom = surface->resource.height;
310     }
311 }
312
313 /* GL locking and context activation is done by the caller */
314 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
315         const RECT *src_rect, const RECT *dst_rect, WINED3DTEXTUREFILTERTYPE Filter)
316 {
317     struct blt_info info;
318
319     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
320
321     glEnable(info.bind_target);
322     checkGLcall("glEnable(bind_target)");
323
324     context_bind_texture(context, info.bind_target, src_surface->texture_name);
325
326     /* Filtering for StretchRect */
327     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
328             wined3d_gl_mag_filter(magLookup, Filter));
329     checkGLcall("glTexParameteri");
330     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
331             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
332     checkGLcall("glTexParameteri");
333     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
334     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
335     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
336     checkGLcall("glTexEnvi");
337
338     /* Draw a quad */
339     glBegin(GL_TRIANGLE_STRIP);
340     glTexCoord3fv(info.coords[0]);
341     glVertex2i(dst_rect->left, dst_rect->top);
342
343     glTexCoord3fv(info.coords[1]);
344     glVertex2i(dst_rect->right, dst_rect->top);
345
346     glTexCoord3fv(info.coords[2]);
347     glVertex2i(dst_rect->left, dst_rect->bottom);
348
349     glTexCoord3fv(info.coords[3]);
350     glVertex2i(dst_rect->right, dst_rect->bottom);
351     glEnd();
352
353     /* Unbind the texture */
354     context_bind_texture(context, info.bind_target, 0);
355
356     /* We changed the filtering settings on the texture. Inform the
357      * container about this to get the filters reset properly next draw. */
358     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
359     {
360         struct wined3d_texture *texture = src_surface->container.u.texture;
361         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3DTEXF_POINT;
362         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3DTEXF_POINT;
363         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3DTEXF_NONE;
364     }
365 }
366
367 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
368 {
369     const struct wined3d_format *format = surface->resource.format;
370     SYSTEM_INFO sysInfo;
371     BITMAPINFO *b_info;
372     int extraline = 0;
373     DWORD *masks;
374     UINT usage;
375     HDC dc;
376
377     TRACE("surface %p.\n", surface);
378
379     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
380     {
381         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
382         return WINED3DERR_INVALIDCALL;
383     }
384
385     switch (format->byte_count)
386     {
387         case 2:
388         case 4:
389             /* Allocate extra space to store the RGB bit masks. */
390             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
391             break;
392
393         case 3:
394             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
395             break;
396
397         default:
398             /* Allocate extra space for a palette. */
399             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
400                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
401             break;
402     }
403
404     if (!b_info)
405         return E_OUTOFMEMORY;
406
407     /* Some applications access the surface in via DWORDs, and do not take
408      * the necessary care at the end of the surface. So we need at least
409      * 4 extra bytes at the end of the surface. Check against the page size,
410      * if the last page used for the surface has at least 4 spare bytes we're
411      * safe, otherwise add an extra line to the DIB section. */
412     GetSystemInfo(&sysInfo);
413     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
414     {
415         extraline = 1;
416         TRACE("Adding an extra line to the DIB section.\n");
417     }
418
419     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
420     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
421     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
422     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
423     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
424             * wined3d_surface_get_pitch(surface);
425     b_info->bmiHeader.biPlanes = 1;
426     b_info->bmiHeader.biBitCount = format->byte_count * 8;
427
428     b_info->bmiHeader.biXPelsPerMeter = 0;
429     b_info->bmiHeader.biYPelsPerMeter = 0;
430     b_info->bmiHeader.biClrUsed = 0;
431     b_info->bmiHeader.biClrImportant = 0;
432
433     /* Get the bit masks */
434     masks = (DWORD *)b_info->bmiColors;
435     switch (surface->resource.format->id)
436     {
437         case WINED3DFMT_B8G8R8_UNORM:
438             usage = DIB_RGB_COLORS;
439             b_info->bmiHeader.biCompression = BI_RGB;
440             break;
441
442         case WINED3DFMT_B5G5R5X1_UNORM:
443         case WINED3DFMT_B5G5R5A1_UNORM:
444         case WINED3DFMT_B4G4R4A4_UNORM:
445         case WINED3DFMT_B4G4R4X4_UNORM:
446         case WINED3DFMT_B2G3R3_UNORM:
447         case WINED3DFMT_B2G3R3A8_UNORM:
448         case WINED3DFMT_R10G10B10A2_UNORM:
449         case WINED3DFMT_R8G8B8A8_UNORM:
450         case WINED3DFMT_R8G8B8X8_UNORM:
451         case WINED3DFMT_B10G10R10A2_UNORM:
452         case WINED3DFMT_B5G6R5_UNORM:
453         case WINED3DFMT_R16G16B16A16_UNORM:
454             usage = 0;
455             b_info->bmiHeader.biCompression = BI_BITFIELDS;
456             masks[0] = format->red_mask;
457             masks[1] = format->green_mask;
458             masks[2] = format->blue_mask;
459             break;
460
461         default:
462             /* Don't know palette */
463             b_info->bmiHeader.biCompression = BI_RGB;
464             usage = 0;
465             break;
466     }
467
468     if (!(dc = GetDC(0)))
469     {
470         HeapFree(GetProcessHeap(), 0, b_info);
471         return HRESULT_FROM_WIN32(GetLastError());
472     }
473
474     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
475             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
476             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
477     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
478     ReleaseDC(0, dc);
479
480     if (!surface->dib.DIBsection)
481     {
482         ERR("Failed to create DIB section.\n");
483         HeapFree(GetProcessHeap(), 0, b_info);
484         return HRESULT_FROM_WIN32(GetLastError());
485     }
486
487     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
488     /* Copy the existing surface to the dib section. */
489     if (surface->resource.allocatedMemory)
490     {
491         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
492                 surface->resource.height * wined3d_surface_get_pitch(surface));
493     }
494     else
495     {
496         /* This is to make maps read the GL texture although memory is allocated. */
497         surface->flags &= ~SFLAG_INSYSMEM;
498     }
499     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
500
501     HeapFree(GetProcessHeap(), 0, b_info);
502
503     /* Now allocate a DC. */
504     surface->hDC = CreateCompatibleDC(0);
505     surface->dib.holdbitmap = SelectObject(surface->hDC, surface->dib.DIBsection);
506     TRACE("Using wined3d palette %p.\n", surface->palette);
507     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
508
509     surface->flags |= SFLAG_DIBSECTION;
510
511     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
512     surface->resource.heapMemory = NULL;
513
514     return WINED3D_OK;
515 }
516
517 static void surface_prepare_system_memory(struct wined3d_surface *surface)
518 {
519     struct wined3d_device *device = surface->resource.device;
520     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
521
522     TRACE("surface %p.\n", surface);
523
524     /* Performance optimization: Count how often a surface is locked, if it is
525      * locked regularly do not throw away the system memory copy. This avoids
526      * the need to download the surface from OpenGL all the time. The surface
527      * is still downloaded if the OpenGL texture is changed. */
528     if (!(surface->flags & SFLAG_DYNLOCK))
529     {
530         if (++surface->lockCount > MAXLOCKCOUNT)
531         {
532             TRACE("Surface is locked regularly, not freeing the system memory copy any more.\n");
533             surface->flags |= SFLAG_DYNLOCK;
534         }
535     }
536
537     /* Create a PBO for dynamically locked surfaces but don't do it for
538      * converted or NPOT surfaces. Also don't create a PBO for systemmem
539      * surfaces. */
540     if (gl_info->supported[ARB_PIXEL_BUFFER_OBJECT] && (surface->flags & SFLAG_DYNLOCK)
541             && !(surface->flags & (SFLAG_PBO | SFLAG_CONVERTED | SFLAG_NONPOW2))
542             && (surface->resource.pool != WINED3DPOOL_SYSTEMMEM))
543     {
544         struct wined3d_context *context;
545         GLenum error;
546
547         context = context_acquire(device, NULL);
548         ENTER_GL();
549
550         GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
551         error = glGetError();
552         if (!surface->pbo || error != GL_NO_ERROR)
553             ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
554
555         TRACE("Binding PBO %u.\n", surface->pbo);
556
557         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
558         checkGLcall("glBindBufferARB");
559
560         GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
561                 surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
562         checkGLcall("glBufferDataARB");
563
564         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
565         checkGLcall("glBindBufferARB");
566
567         /* We don't need the system memory anymore and we can't even use it for PBOs. */
568         if (!(surface->flags & SFLAG_CLIENT))
569         {
570             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
571             surface->resource.heapMemory = NULL;
572         }
573         surface->resource.allocatedMemory = NULL;
574         surface->flags |= SFLAG_PBO;
575         LEAVE_GL();
576         context_release(context);
577     }
578     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
579     {
580         /* Whatever surface we have, make sure that there is memory allocated
581          * for the downloaded copy, or a PBO to map. */
582         if (!surface->resource.heapMemory)
583             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
584
585         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
586                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
587
588         if (surface->flags & SFLAG_INSYSMEM)
589             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
590     }
591 }
592
593 static void surface_evict_sysmem(struct wined3d_surface *surface)
594 {
595     if (surface->flags & SFLAG_DONOTFREE)
596         return;
597
598     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
599     surface->resource.allocatedMemory = NULL;
600     surface->resource.heapMemory = NULL;
601     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
602 }
603
604 /* Context activation is done by the caller. */
605 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
606         struct wined3d_context *context, BOOL srgb)
607 {
608     struct wined3d_device *device = surface->resource.device;
609     DWORD active_sampler;
610
611     /* We don't need a specific texture unit, but after binding the texture
612      * the current unit is dirty. Read the unit back instead of switching to
613      * 0, this avoids messing around with the state manager's GL states. The
614      * current texture unit should always be a valid one.
615      *
616      * To be more specific, this is tricky because we can implicitly be
617      * called from sampler() in state.c. This means we can't touch anything
618      * other than whatever happens to be the currently active texture, or we
619      * would risk marking already applied sampler states dirty again. */
620     active_sampler = device->rev_tex_unit_map[context->active_texture];
621
622     if (active_sampler != WINED3D_UNMAPPED_STAGE)
623         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
624     surface_bind(surface, context, srgb);
625 }
626
627 static void surface_force_reload(struct wined3d_surface *surface)
628 {
629     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
630 }
631
632 static void surface_release_client_storage(struct wined3d_surface *surface)
633 {
634     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
635
636     ENTER_GL();
637     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
638     if (surface->texture_name)
639     {
640         surface_bind_and_dirtify(surface, context, FALSE);
641         glTexImage2D(surface->texture_target, surface->texture_level,
642                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
643     }
644     if (surface->texture_name_srgb)
645     {
646         surface_bind_and_dirtify(surface, context, TRUE);
647         glTexImage2D(surface->texture_target, surface->texture_level,
648                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
649     }
650     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
651     LEAVE_GL();
652
653     context_release(context);
654
655     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
656     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
657     surface_force_reload(surface);
658 }
659
660 static HRESULT surface_private_setup(struct wined3d_surface *surface)
661 {
662     /* TODO: Check against the maximum texture sizes supported by the video card. */
663     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
664     unsigned int pow2Width, pow2Height;
665
666     TRACE("surface %p.\n", surface);
667
668     surface->texture_name = 0;
669     surface->texture_target = GL_TEXTURE_2D;
670
671     /* Non-power2 support */
672     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
673     {
674         pow2Width = surface->resource.width;
675         pow2Height = surface->resource.height;
676     }
677     else
678     {
679         /* Find the nearest pow2 match */
680         pow2Width = pow2Height = 1;
681         while (pow2Width < surface->resource.width)
682             pow2Width <<= 1;
683         while (pow2Height < surface->resource.height)
684             pow2Height <<= 1;
685     }
686     surface->pow2Width = pow2Width;
687     surface->pow2Height = pow2Height;
688
689     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
690     {
691         /* TODO: Add support for non power two compressed textures. */
692         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
693         {
694             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
695                   surface, surface->resource.width, surface->resource.height);
696             return WINED3DERR_NOTAVAILABLE;
697         }
698     }
699
700     if (pow2Width != surface->resource.width
701             || pow2Height != surface->resource.height)
702     {
703         surface->flags |= SFLAG_NONPOW2;
704     }
705
706     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
707             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
708     {
709         /* One of three options:
710          * 1: Do the same as we do with NPOT and scale the texture, (any
711          *    texture ops would require the texture to be scaled which is
712          *    potentially slow)
713          * 2: Set the texture to the maximum size (bad idea).
714          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
715          * 4: Create the surface, but allow it to be used only for DirectDraw
716          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
717          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
718          *    the render target. */
719         if (surface->resource.pool == WINED3DPOOL_DEFAULT || surface->resource.pool == WINED3DPOOL_MANAGED)
720         {
721             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
722             return WINED3DERR_NOTAVAILABLE;
723         }
724
725         /* We should never use this surface in combination with OpenGL! */
726         TRACE("Creating an oversized surface: %ux%u.\n",
727                 surface->pow2Width, surface->pow2Height);
728     }
729     else
730     {
731         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
732          * and EXT_PALETTED_TEXTURE is used in combination with texture
733          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
734          * EXT_PALETTED_TEXTURE doesn't work in combination with
735          * ARB_TEXTURE_RECTANGLE. */
736         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
737                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
738                 && gl_info->supported[EXT_PALETTED_TEXTURE]
739                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
740         {
741             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
742             surface->pow2Width = surface->resource.width;
743             surface->pow2Height = surface->resource.height;
744             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
745         }
746     }
747
748     switch (wined3d_settings.offscreen_rendering_mode)
749     {
750         case ORM_FBO:
751             surface->get_drawable_size = get_drawable_size_fbo;
752             break;
753
754         case ORM_BACKBUFFER:
755             surface->get_drawable_size = get_drawable_size_backbuffer;
756             break;
757
758         default:
759             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
760             return WINED3DERR_INVALIDCALL;
761     }
762
763     surface->flags |= SFLAG_INSYSMEM;
764
765     return WINED3D_OK;
766 }
767
768 static void surface_realize_palette(struct wined3d_surface *surface)
769 {
770     struct wined3d_palette *palette = surface->palette;
771
772     TRACE("surface %p.\n", surface);
773
774     if (!palette) return;
775
776     if (surface->resource.format->id == WINED3DFMT_P8_UINT
777             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
778     {
779         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
780         {
781             /* Make sure the texture is up to date. This call doesn't do
782              * anything if the texture is already up to date. */
783             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
784
785             /* We want to force a palette refresh, so mark the drawable as not being up to date */
786             if (!surface_is_offscreen(surface))
787                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
788         }
789         else
790         {
791             if (!(surface->flags & SFLAG_INSYSMEM))
792             {
793                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
794                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
795             }
796             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
797         }
798     }
799
800     if (surface->flags & SFLAG_DIBSECTION)
801     {
802         RGBQUAD col[256];
803         unsigned int i;
804
805         TRACE("Updating the DC's palette.\n");
806
807         for (i = 0; i < 256; ++i)
808         {
809             col[i].rgbRed   = palette->palents[i].peRed;
810             col[i].rgbGreen = palette->palents[i].peGreen;
811             col[i].rgbBlue  = palette->palents[i].peBlue;
812             col[i].rgbReserved = 0;
813         }
814         SetDIBColorTable(surface->hDC, 0, 256, col);
815     }
816
817     /* Propagate the changes to the drawable when we have a palette. */
818     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
819         surface_load_location(surface, surface->draw_binding, NULL);
820 }
821
822 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
823 {
824     HRESULT hr;
825
826     /* If there's no destination surface there is nothing to do. */
827     if (!surface->overlay_dest)
828         return WINED3D_OK;
829
830     /* Blt calls ModifyLocation on the dest surface, which in turn calls
831      * DrawOverlay to update the overlay. Prevent an endless recursion. */
832     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
833         return WINED3D_OK;
834
835     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
836     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
837             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3DTEXF_LINEAR);
838     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
839
840     return hr;
841 }
842
843 static void surface_preload(struct wined3d_surface *surface)
844 {
845     TRACE("surface %p.\n", surface);
846
847     surface_internal_preload(surface, SRGB_ANY);
848 }
849
850 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
851 {
852     struct wined3d_device *device = surface->resource.device;
853     const RECT *pass_rect = rect;
854
855     TRACE("surface %p, rect %s, flags %#x.\n",
856             surface, wine_dbgstr_rect(rect), flags);
857
858     if (flags & WINED3DLOCK_DISCARD)
859     {
860         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
861         surface_prepare_system_memory(surface);
862         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
863     }
864     else
865     {
866         /* surface_load_location() does not check if the rectangle specifies
867          * the full surface. Most callers don't need that, so do it here. */
868         if (rect && !rect->top && !rect->left
869                 && rect->right == surface->resource.width
870                 && rect->bottom == surface->resource.height)
871             pass_rect = NULL;
872
873         if (!(wined3d_settings.rendertargetlock_mode == RTL_DISABLE
874                 && ((surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
875                 || surface == device->fb.render_targets[0])))
876             surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
877     }
878
879     if (surface->flags & SFLAG_PBO)
880     {
881         const struct wined3d_gl_info *gl_info;
882         struct wined3d_context *context;
883
884         context = context_acquire(device, NULL);
885         gl_info = context->gl_info;
886
887         ENTER_GL();
888         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
889         checkGLcall("glBindBufferARB");
890
891         /* This shouldn't happen but could occur if some other function
892          * didn't handle the PBO properly. */
893         if (surface->resource.allocatedMemory)
894             ERR("The surface already has PBO memory allocated.\n");
895
896         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
897         checkGLcall("glMapBufferARB");
898
899         /* Make sure the PBO isn't set anymore in order not to break non-PBO
900          * calls. */
901         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
902         checkGLcall("glBindBufferARB");
903
904         LEAVE_GL();
905         context_release(context);
906     }
907
908     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
909     {
910         if (!rect)
911             surface_add_dirty_rect(surface, NULL);
912         else
913         {
914             WINED3DBOX b;
915
916             b.Left = rect->left;
917             b.Top = rect->top;
918             b.Right = rect->right;
919             b.Bottom = rect->bottom;
920             b.Front = 0;
921             b.Back = 1;
922             surface_add_dirty_rect(surface, &b);
923         }
924     }
925 }
926
927 static void surface_unmap(struct wined3d_surface *surface)
928 {
929     struct wined3d_device *device = surface->resource.device;
930     BOOL fullsurface;
931
932     TRACE("surface %p.\n", surface);
933
934     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
935
936     if (surface->flags & SFLAG_PBO)
937     {
938         const struct wined3d_gl_info *gl_info;
939         struct wined3d_context *context;
940
941         TRACE("Freeing PBO memory.\n");
942
943         context = context_acquire(device, NULL);
944         gl_info = context->gl_info;
945
946         ENTER_GL();
947         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
948         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
949         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
950         checkGLcall("glUnmapBufferARB");
951         LEAVE_GL();
952         context_release(context);
953
954         surface->resource.allocatedMemory = NULL;
955     }
956
957     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
958
959     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
960     {
961         TRACE("Not dirtified, nothing to do.\n");
962         goto done;
963     }
964
965     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
966             || (device->fb.render_targets && surface == device->fb.render_targets[0]))
967     {
968         if (wined3d_settings.rendertargetlock_mode == RTL_DISABLE)
969         {
970             static BOOL warned = FALSE;
971             if (!warned)
972             {
973                 ERR("The application tries to write to the render target, but render target locking is disabled.\n");
974                 warned = TRUE;
975             }
976             goto done;
977         }
978
979         if (!surface->dirtyRect.left && !surface->dirtyRect.top
980                 && surface->dirtyRect.right == surface->resource.width
981                 && surface->dirtyRect.bottom == surface->resource.height)
982         {
983             fullsurface = TRUE;
984         }
985         else
986         {
987             /* TODO: Proper partial rectangle tracking. */
988             fullsurface = FALSE;
989             surface->flags |= SFLAG_INSYSMEM;
990         }
991
992         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
993
994         /* Partial rectangle tracking is not commonly implemented, it is only
995          * done for render targets. INSYSMEM was set before to tell
996          * surface_load_location() where to read the rectangle from.
997          * Indrawable is set because all modifications from the partial
998          * sysmem copy are written back to the drawable, thus the surface is
999          * merged again in the drawable. The sysmem copy is not fully up to
1000          * date because only a subrectangle was read in Map(). */
1001         if (!fullsurface)
1002         {
1003             surface_modify_location(surface, SFLAG_INDRAWABLE, TRUE);
1004             surface_evict_sysmem(surface);
1005         }
1006
1007         surface->dirtyRect.left = surface->resource.width;
1008         surface->dirtyRect.top = surface->resource.height;
1009         surface->dirtyRect.right = 0;
1010         surface->dirtyRect.bottom = 0;
1011     }
1012     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
1013     {
1014         FIXME("Depth / stencil buffer locking is not implemented.\n");
1015     }
1016
1017 done:
1018     /* Overlays have to be redrawn manually after changes with the GL implementation */
1019     if (surface->overlay_dest)
1020         surface->surface_ops->surface_draw_overlay(surface);
1021 }
1022
1023 static HRESULT surface_getdc(struct wined3d_surface *surface)
1024 {
1025     WINED3DLOCKED_RECT lock;
1026     HRESULT hr;
1027
1028     TRACE("surface %p.\n", surface);
1029
1030     /* Create a DIB section if there isn't a dc yet. */
1031     if (!surface->hDC)
1032     {
1033         if (surface->flags & SFLAG_CLIENT)
1034         {
1035             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1036             surface_release_client_storage(surface);
1037         }
1038         hr = surface_create_dib_section(surface);
1039         if (FAILED(hr))
1040             return WINED3DERR_INVALIDCALL;
1041
1042         /* Use the DIB section from now on if we are not using a PBO. */
1043         if (!(surface->flags & SFLAG_PBO))
1044             surface->resource.allocatedMemory = surface->dib.bitmap_data;
1045     }
1046
1047     /* Map the surface. */
1048     hr = wined3d_surface_map(surface, &lock, NULL, 0);
1049     if (FAILED(hr))
1050         ERR("Map failed, hr %#x.\n", hr);
1051
1052     /* Sync the DIB with the PBO. This can't be done earlier because Map()
1053      * activates the allocatedMemory. */
1054     if (surface->flags & SFLAG_PBO)
1055         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->dib.bitmap_size);
1056
1057     return hr;
1058 }
1059
1060 static HRESULT surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override)
1061 {
1062     TRACE("surface %p, override %p.\n", surface, override);
1063
1064     /* Flipping is only supported on render targets and overlays. */
1065     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
1066     {
1067         WARN("Tried to flip a non-render target, non-overlay surface.\n");
1068         return WINEDDERR_NOTFLIPPABLE;
1069     }
1070
1071     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1072     {
1073         flip_surface(surface, override);
1074
1075         /* Update the overlay if it is visible */
1076         if (surface->overlay_dest)
1077             return surface->surface_ops->surface_draw_overlay(surface);
1078         else
1079             return WINED3D_OK;
1080     }
1081
1082     return WINED3D_OK;
1083 }
1084
1085 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1086 {
1087     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1088         return FALSE;
1089     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1090         return FALSE;
1091     return TRUE;
1092 }
1093
1094 static void wined3d_surface_depth_blt_fbo(struct wined3d_device *device, struct wined3d_surface *src_surface,
1095         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1096 {
1097     const struct wined3d_gl_info *gl_info;
1098     struct wined3d_context *context;
1099     DWORD src_mask, dst_mask;
1100     GLbitfield gl_mask;
1101
1102     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1103             device, src_surface, wine_dbgstr_rect(src_rect),
1104             dst_surface, wine_dbgstr_rect(dst_rect));
1105
1106     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1107     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1108
1109     if (src_mask != dst_mask)
1110     {
1111         ERR("Incompatible formats %s and %s.\n",
1112                 debug_d3dformat(src_surface->resource.format->id),
1113                 debug_d3dformat(dst_surface->resource.format->id));
1114         return;
1115     }
1116
1117     if (!src_mask)
1118     {
1119         ERR("Not a depth / stencil format: %s.\n",
1120                 debug_d3dformat(src_surface->resource.format->id));
1121         return;
1122     }
1123
1124     gl_mask = 0;
1125     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1126         gl_mask |= GL_DEPTH_BUFFER_BIT;
1127     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1128         gl_mask |= GL_STENCIL_BUFFER_BIT;
1129
1130     /* Make sure the locations are up-to-date. Loading the destination
1131      * surface isn't required if the entire surface is overwritten. */
1132     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1133     if (!surface_is_full_rect(dst_surface, dst_rect))
1134         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1135
1136     context = context_acquire(device, NULL);
1137     if (!context->valid)
1138     {
1139         context_release(context);
1140         WARN("Invalid context, skipping blit.\n");
1141         return;
1142     }
1143
1144     gl_info = context->gl_info;
1145
1146     ENTER_GL();
1147
1148     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1149     glReadBuffer(GL_NONE);
1150     checkGLcall("glReadBuffer()");
1151     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1152
1153     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1154     context_set_draw_buffer(context, GL_NONE);
1155     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1156
1157     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1158     {
1159         glDepthMask(GL_TRUE);
1160         context_invalidate_state(context, STATE_RENDER(WINED3DRS_ZWRITEENABLE));
1161     }
1162     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1163     {
1164         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1165         {
1166             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1167             context_invalidate_state(context, STATE_RENDER(WINED3DRS_TWOSIDEDSTENCILMODE));
1168         }
1169         glStencilMask(~0U);
1170         context_invalidate_state(context, STATE_RENDER(WINED3DRS_STENCILWRITEMASK));
1171     }
1172
1173     glDisable(GL_SCISSOR_TEST);
1174     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1175
1176     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1177             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1178     checkGLcall("glBlitFramebuffer()");
1179
1180     LEAVE_GL();
1181
1182     if (wined3d_settings.strict_draw_ordering)
1183         wglFlush(); /* Flush to ensure ordering across contexts. */
1184
1185     context_release(context);
1186 }
1187
1188 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1189  * Depth / stencil is not supported. */
1190 static void surface_blt_fbo(struct wined3d_device *device, const WINED3DTEXTUREFILTERTYPE filter,
1191         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1192         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1193 {
1194     const struct wined3d_gl_info *gl_info;
1195     struct wined3d_context *context;
1196     RECT src_rect, dst_rect;
1197     GLenum gl_filter;
1198     GLenum buffer;
1199
1200     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1201     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1202             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1203     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1204             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1205
1206     src_rect = *src_rect_in;
1207     dst_rect = *dst_rect_in;
1208
1209     switch (filter)
1210     {
1211         case WINED3DTEXF_LINEAR:
1212             gl_filter = GL_LINEAR;
1213             break;
1214
1215         default:
1216             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1217         case WINED3DTEXF_NONE:
1218         case WINED3DTEXF_POINT:
1219             gl_filter = GL_NEAREST;
1220             break;
1221     }
1222
1223     /* Resolve the source surface first if needed. */
1224     if (src_location == SFLAG_INRB_MULTISAMPLE
1225             && (src_surface->resource.format->id != dst_surface->resource.format->id
1226                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1227                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1228         src_location = SFLAG_INRB_RESOLVED;
1229
1230     /* Make sure the locations are up-to-date. Loading the destination
1231      * surface isn't required if the entire surface is overwritten. (And is
1232      * in fact harmful if we're being called by surface_load_location() with
1233      * the purpose of loading the destination surface.) */
1234     surface_load_location(src_surface, src_location, NULL);
1235     if (!surface_is_full_rect(dst_surface, &dst_rect))
1236         surface_load_location(dst_surface, dst_location, NULL);
1237
1238     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1239     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1240     else context = context_acquire(device, NULL);
1241
1242     if (!context->valid)
1243     {
1244         context_release(context);
1245         WARN("Invalid context, skipping blit.\n");
1246         return;
1247     }
1248
1249     gl_info = context->gl_info;
1250
1251     if (src_location == SFLAG_INDRAWABLE)
1252     {
1253         TRACE("Source surface %p is onscreen.\n", src_surface);
1254         buffer = surface_get_gl_buffer(src_surface);
1255         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1256     }
1257     else
1258     {
1259         TRACE("Source surface %p is offscreen.\n", src_surface);
1260         buffer = GL_COLOR_ATTACHMENT0;
1261     }
1262
1263     ENTER_GL();
1264     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1265     glReadBuffer(buffer);
1266     checkGLcall("glReadBuffer()");
1267     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1268     LEAVE_GL();
1269
1270     if (dst_location == SFLAG_INDRAWABLE)
1271     {
1272         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1273         buffer = surface_get_gl_buffer(dst_surface);
1274         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1275     }
1276     else
1277     {
1278         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1279         buffer = GL_COLOR_ATTACHMENT0;
1280     }
1281
1282     ENTER_GL();
1283     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1284     context_set_draw_buffer(context, buffer);
1285     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1286     context_invalidate_state(context, STATE_FRAMEBUFFER);
1287
1288     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1289     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE));
1290     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE1));
1291     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE2));
1292     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE3));
1293
1294     glDisable(GL_SCISSOR_TEST);
1295     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1296
1297     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1298             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1299     checkGLcall("glBlitFramebuffer()");
1300
1301     LEAVE_GL();
1302
1303     if (wined3d_settings.strict_draw_ordering
1304             || (dst_location == SFLAG_INDRAWABLE
1305             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1306         wglFlush();
1307
1308     context_release(context);
1309 }
1310
1311 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1312         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
1313         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
1314 {
1315     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1316         return FALSE;
1317
1318     /* Source and/or destination need to be on the GL side */
1319     if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
1320         return FALSE;
1321
1322     switch (blit_op)
1323     {
1324         case WINED3D_BLIT_OP_COLOR_BLIT:
1325             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1326                 return FALSE;
1327             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1328                 return FALSE;
1329             break;
1330
1331         case WINED3D_BLIT_OP_DEPTH_BLIT:
1332             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1333                 return FALSE;
1334             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1335                 return FALSE;
1336             break;
1337
1338         default:
1339             return FALSE;
1340     }
1341
1342     if (!(src_format->id == dst_format->id
1343             || (is_identity_fixup(src_format->color_fixup)
1344             && is_identity_fixup(dst_format->color_fixup))))
1345         return FALSE;
1346
1347     return TRUE;
1348 }
1349
1350 /* This function checks if the primary render target uses the 8bit paletted format. */
1351 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1352 {
1353     if (device->fb.render_targets && device->fb.render_targets[0])
1354     {
1355         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1356         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1357                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1358             return TRUE;
1359     }
1360     return FALSE;
1361 }
1362
1363 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1364         DWORD color, WINED3DCOLORVALUE *float_color)
1365 {
1366     const struct wined3d_format *format = surface->resource.format;
1367     const struct wined3d_device *device = surface->resource.device;
1368
1369     switch (format->id)
1370     {
1371         case WINED3DFMT_P8_UINT:
1372             if (surface->palette)
1373             {
1374                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1375                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1376                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1377             }
1378             else
1379             {
1380                 float_color->r = 0.0f;
1381                 float_color->g = 0.0f;
1382                 float_color->b = 0.0f;
1383             }
1384             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1385             break;
1386
1387         case WINED3DFMT_B5G6R5_UNORM:
1388             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1389             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1390             float_color->b = (color & 0x1f) / 31.0f;
1391             float_color->a = 1.0f;
1392             break;
1393
1394         case WINED3DFMT_B8G8R8_UNORM:
1395         case WINED3DFMT_B8G8R8X8_UNORM:
1396             float_color->r = D3DCOLOR_R(color);
1397             float_color->g = D3DCOLOR_G(color);
1398             float_color->b = D3DCOLOR_B(color);
1399             float_color->a = 1.0f;
1400             break;
1401
1402         case WINED3DFMT_B8G8R8A8_UNORM:
1403             float_color->r = D3DCOLOR_R(color);
1404             float_color->g = D3DCOLOR_G(color);
1405             float_color->b = D3DCOLOR_B(color);
1406             float_color->a = D3DCOLOR_A(color);
1407             break;
1408
1409         default:
1410             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1411             return FALSE;
1412     }
1413
1414     return TRUE;
1415 }
1416
1417 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1418 {
1419     const struct wined3d_format *format = surface->resource.format;
1420
1421     switch (format->id)
1422     {
1423         case WINED3DFMT_S1_UINT_D15_UNORM:
1424             *float_depth = depth / (float)0x00007fff;
1425             break;
1426
1427         case WINED3DFMT_D16_UNORM:
1428             *float_depth = depth / (float)0x0000ffff;
1429             break;
1430
1431         case WINED3DFMT_D24_UNORM_S8_UINT:
1432         case WINED3DFMT_X8D24_UNORM:
1433             *float_depth = depth / (float)0x00ffffff;
1434             break;
1435
1436         case WINED3DFMT_D32_UNORM:
1437             *float_depth = depth / (float)0xffffffff;
1438             break;
1439
1440         default:
1441             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1442             return FALSE;
1443     }
1444
1445     return TRUE;
1446 }
1447
1448 /* Do not call while under the GL lock. */
1449 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1450 {
1451     const struct wined3d_resource *resource = &surface->resource;
1452     struct wined3d_device *device = resource->device;
1453     const struct blit_shader *blitter;
1454
1455     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1456             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1457     if (!blitter)
1458     {
1459         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1460         return WINED3DERR_INVALIDCALL;
1461     }
1462
1463     return blitter->depth_fill(device, surface, rect, depth);
1464 }
1465
1466 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1467         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1468 {
1469     struct wined3d_device *device = src_surface->resource.device;
1470
1471     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1472             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1473             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1474         return WINED3DERR_INVALIDCALL;
1475
1476     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1477
1478     surface_modify_ds_location(dst_surface, SFLAG_DS_OFFSCREEN,
1479             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1480     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
1481
1482     return WINED3D_OK;
1483 }
1484
1485 /* Do not call while under the GL lock. */
1486 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1487         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1488         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1489 {
1490     const struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1491     struct wined3d_device *device = dst_surface->resource.device;
1492     DWORD src_ds_flags, dst_ds_flags;
1493     RECT src_rect, dst_rect;
1494
1495     static const DWORD simple_blit = WINEDDBLT_ASYNC
1496             | WINEDDBLT_COLORFILL
1497             | WINEDDBLT_WAIT
1498             | WINEDDBLT_DEPTHFILL
1499             | WINEDDBLT_DONOTWAIT;
1500
1501     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1502             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1503             flags, fx, debug_d3dtexturefiltertype(filter));
1504     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1505
1506     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1507     {
1508         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1509         return WINEDDERR_SURFACEBUSY;
1510     }
1511
1512     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1513
1514     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1515             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1516             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1517             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1518             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1519     {
1520         /* The destination rect can be out of bounds on the condition
1521          * that a clipper is set for the surface. */
1522         if (dst_surface->clipper)
1523             FIXME("Blit clipping not implemented.\n");
1524         else
1525             WARN("The application gave us a bad destination rectangle without a clipper set.\n");
1526         return WINEDDERR_INVALIDRECT;
1527     }
1528
1529     if (src_surface)
1530     {
1531         surface_get_rect(src_surface, src_rect_in, &src_rect);
1532
1533         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1534                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1535                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1536                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1537                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1538         {
1539             WARN("Application gave us bad source rectangle for Blt.\n");
1540             return WINEDDERR_INVALIDRECT;
1541         }
1542     }
1543     else
1544     {
1545         memset(&src_rect, 0, sizeof(src_rect));
1546     }
1547
1548     if (!fx || !(fx->dwDDFX))
1549         flags &= ~WINEDDBLT_DDFX;
1550
1551     if (flags & WINEDDBLT_WAIT)
1552         flags &= ~WINEDDBLT_WAIT;
1553
1554     if (flags & WINEDDBLT_ASYNC)
1555     {
1556         static unsigned int once;
1557
1558         if (!once++)
1559             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1560         flags &= ~WINEDDBLT_ASYNC;
1561     }
1562
1563     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1564     if (flags & WINEDDBLT_DONOTWAIT)
1565     {
1566         static unsigned int once;
1567
1568         if (!once++)
1569             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1570         flags &= ~WINEDDBLT_DONOTWAIT;
1571     }
1572
1573     if (!device->d3d_initialized)
1574     {
1575         WARN("D3D not initialized, using fallback.\n");
1576         goto cpu;
1577     }
1578
1579     if (flags & ~simple_blit)
1580     {
1581         WARN("Using fallback for complex blit (%#x).\n", flags);
1582         goto fallback;
1583     }
1584
1585     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1586         src_swapchain = src_surface->container.u.swapchain;
1587     else
1588         src_swapchain = NULL;
1589
1590     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1591         dst_swapchain = dst_surface->container.u.swapchain;
1592     else
1593         dst_swapchain = NULL;
1594
1595     /* This isn't strictly needed. FBO blits for example could deal with
1596      * cross-swapchain blits by first downloading the source to a texture
1597      * before switching to the destination context. We just have this here to
1598      * not have to deal with the issue, since cross-swapchain blits should be
1599      * rare. */
1600     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1601     {
1602         FIXME("Using fallback for cross-swapchain blit.\n");
1603         goto fallback;
1604     }
1605
1606     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1607     if (src_surface)
1608         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1609     else
1610         src_ds_flags = 0;
1611
1612     if (src_ds_flags || dst_ds_flags)
1613     {
1614         if (flags & WINEDDBLT_DEPTHFILL)
1615         {
1616             float depth;
1617
1618             TRACE("Depth fill.\n");
1619
1620             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1621                 return WINED3DERR_INVALIDCALL;
1622
1623             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1624                 return WINED3D_OK;
1625         }
1626         else
1627         {
1628             /* Accessing depth / stencil surfaces is supposed to fail while in
1629              * a scene, except for fills, which seem to work. */
1630             if (device->inScene)
1631             {
1632                 WARN("Rejecting depth / stencil access while in scene.\n");
1633                 return WINED3DERR_INVALIDCALL;
1634             }
1635
1636             if (src_ds_flags != dst_ds_flags)
1637             {
1638                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1639                 return WINED3DERR_INVALIDCALL;
1640             }
1641
1642             if (src_rect.top || src_rect.left
1643                     || src_rect.bottom != src_surface->resource.height
1644                     || src_rect.right != src_surface->resource.width)
1645             {
1646                 WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
1647                         wine_dbgstr_rect(&src_rect));
1648                 return WINED3DERR_INVALIDCALL;
1649             }
1650
1651             if (dst_rect.top || dst_rect.left
1652                     || dst_rect.bottom != dst_surface->resource.height
1653                     || dst_rect.right != dst_surface->resource.width)
1654             {
1655                 WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
1656                         wine_dbgstr_rect(&src_rect));
1657                 return WINED3DERR_INVALIDCALL;
1658             }
1659
1660             if (src_surface->resource.height != dst_surface->resource.height
1661                     || src_surface->resource.width != dst_surface->resource.width)
1662             {
1663                 WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
1664                 return WINED3DERR_INVALIDCALL;
1665             }
1666
1667             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1668                 return WINED3D_OK;
1669         }
1670     }
1671     else
1672     {
1673         if (flags & WINEDDBLT_COLORFILL)
1674         {
1675             WINED3DCOLORVALUE color;
1676
1677             TRACE("Color fill.\n");
1678
1679             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1680                 goto fallback;
1681
1682             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1683                 return WINED3D_OK;
1684         }
1685         else
1686         {
1687             TRACE("Color blit.\n");
1688
1689             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1690                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1691                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1692             {
1693                 TRACE("Using FBO blit.\n");
1694
1695                 surface_blt_fbo(device, filter,
1696                         src_surface, src_surface->draw_binding, &src_rect,
1697                         dst_surface, dst_surface->draw_binding, &dst_rect);
1698                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1699                 return WINED3D_OK;
1700             }
1701
1702             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1703                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1704                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1705             {
1706                 TRACE("Using arbfp blit.\n");
1707
1708                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1709                     return WINED3D_OK;
1710             }
1711         }
1712     }
1713
1714 fallback:
1715
1716     /* Special cases for render targets. */
1717     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1718             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1719     {
1720         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1721                 src_surface, &src_rect, flags, fx, filter)))
1722             return WINED3D_OK;
1723     }
1724
1725 cpu:
1726
1727     /* For the rest call the X11 surface implementation. For render targets
1728      * this should be implemented OpenGL accelerated in BltOverride, other
1729      * blits are rather rare. */
1730     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1731 }
1732
1733 /* Do not call while under the GL lock. */
1734 HRESULT CDECL wined3d_surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
1735         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD trans)
1736 {
1737     RECT src_rect, dst_rect;
1738     DWORD flags = 0;
1739
1740     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect_in %s, trans %#x.\n",
1741             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect_in), trans);
1742
1743     surface_get_rect(src_surface, src_rect_in, &src_rect);
1744
1745     dst_rect.left = dst_x;
1746     dst_rect.top = dst_y;
1747     dst_rect.right = dst_x + src_rect.right - src_rect.left;
1748     dst_rect.bottom = dst_y + src_rect.bottom - src_rect.top;
1749
1750     if (trans & WINEDDBLTFAST_SRCCOLORKEY)
1751         flags |= WINEDDBLT_KEYSRC;
1752     if (trans & WINEDDBLTFAST_DESTCOLORKEY)
1753         flags |= WINEDDBLT_KEYDEST;
1754     if (trans & WINEDDBLTFAST_WAIT)
1755         flags |= WINEDDBLT_WAIT;
1756     if (trans & WINEDDBLTFAST_DONOTWAIT)
1757         flags |= WINEDDBLT_DONOTWAIT;
1758
1759     return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, NULL, WINED3DTEXF_POINT);
1760 }
1761
1762 static HRESULT surface_set_mem(struct wined3d_surface *surface, void *mem)
1763 {
1764     TRACE("surface %p, mem %p.\n", surface, mem);
1765
1766     if (mem && mem != surface->resource.allocatedMemory)
1767     {
1768         void *release = NULL;
1769
1770         /* Do I have to copy the old surface content? */
1771         if (surface->flags & SFLAG_DIBSECTION)
1772         {
1773             SelectObject(surface->hDC, surface->dib.holdbitmap);
1774             DeleteDC(surface->hDC);
1775             /* Release the DIB section. */
1776             DeleteObject(surface->dib.DIBsection);
1777             surface->dib.bitmap_data = NULL;
1778             surface->resource.allocatedMemory = NULL;
1779             surface->hDC = NULL;
1780             surface->flags &= ~SFLAG_DIBSECTION;
1781         }
1782         else if (!(surface->flags & SFLAG_USERPTR))
1783         {
1784             release = surface->resource.heapMemory;
1785             surface->resource.heapMemory = NULL;
1786         }
1787         surface->resource.allocatedMemory = mem;
1788         surface->flags |= SFLAG_USERPTR;
1789
1790         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
1791         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1792
1793         /* For client textures OpenGL has to be notified. */
1794         if (surface->flags & SFLAG_CLIENT)
1795             surface_release_client_storage(surface);
1796
1797         /* Now free the old memory if any. */
1798         HeapFree(GetProcessHeap(), 0, release);
1799     }
1800     else if (surface->flags & SFLAG_USERPTR)
1801     {
1802         /* HeapMemory should be NULL already. */
1803         if (surface->resource.heapMemory)
1804             ERR("User pointer surface has heap memory allocated.\n");
1805
1806         if (!mem)
1807         {
1808             surface->resource.allocatedMemory = NULL;
1809             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
1810
1811             if (surface->flags & SFLAG_CLIENT)
1812                 surface_release_client_storage(surface);
1813
1814             surface_prepare_system_memory(surface);
1815         }
1816
1817         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1818     }
1819
1820     return WINED3D_OK;
1821 }
1822
1823 /* Context activation is done by the caller. */
1824 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1825 {
1826     if (!surface->resource.heapMemory)
1827     {
1828         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1829         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1830                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1831     }
1832
1833     ENTER_GL();
1834     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1835     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1836     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1837             surface->resource.size, surface->resource.allocatedMemory));
1838     checkGLcall("glGetBufferSubDataARB");
1839     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1840     checkGLcall("glDeleteBuffersARB");
1841     LEAVE_GL();
1842
1843     surface->pbo = 0;
1844     surface->flags &= ~SFLAG_PBO;
1845 }
1846
1847 /* Do not call while under the GL lock. */
1848 static void surface_unload(struct wined3d_resource *resource)
1849 {
1850     struct wined3d_surface *surface = surface_from_resource(resource);
1851     struct wined3d_renderbuffer_entry *entry, *entry2;
1852     struct wined3d_device *device = resource->device;
1853     const struct wined3d_gl_info *gl_info;
1854     struct wined3d_context *context;
1855
1856     TRACE("surface %p.\n", surface);
1857
1858     if (resource->pool == WINED3DPOOL_DEFAULT)
1859     {
1860         /* Default pool resources are supposed to be destroyed before Reset is called.
1861          * Implicit resources stay however. So this means we have an implicit render target
1862          * or depth stencil. The content may be destroyed, but we still have to tear down
1863          * opengl resources, so we cannot leave early.
1864          *
1865          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1866          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1867          * or the depth stencil into an FBO the texture or render buffer will be removed
1868          * and all flags get lost
1869          */
1870         surface_init_sysmem(surface);
1871     }
1872     else
1873     {
1874         /* Load the surface into system memory */
1875         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1876         surface_modify_location(surface, surface->draw_binding, FALSE);
1877     }
1878     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1879     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1880     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1881
1882     context = context_acquire(device, NULL);
1883     gl_info = context->gl_info;
1884
1885     /* Destroy PBOs, but load them into real sysmem before */
1886     if (surface->flags & SFLAG_PBO)
1887         surface_remove_pbo(surface, gl_info);
1888
1889     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1890      * all application-created targets the application has to release the surface
1891      * before calling _Reset
1892      */
1893     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1894     {
1895         ENTER_GL();
1896         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1897         LEAVE_GL();
1898         list_remove(&entry->entry);
1899         HeapFree(GetProcessHeap(), 0, entry);
1900     }
1901     list_init(&surface->renderbuffers);
1902     surface->current_renderbuffer = NULL;
1903
1904     ENTER_GL();
1905
1906     /* If we're in a texture, the texture name belongs to the texture.
1907      * Otherwise, destroy it. */
1908     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1909     {
1910         glDeleteTextures(1, &surface->texture_name);
1911         surface->texture_name = 0;
1912         glDeleteTextures(1, &surface->texture_name_srgb);
1913         surface->texture_name_srgb = 0;
1914     }
1915     if (surface->rb_multisample)
1916     {
1917         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1918         surface->rb_multisample = 0;
1919     }
1920     if (surface->rb_resolved)
1921     {
1922         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1923         surface->rb_resolved = 0;
1924     }
1925
1926     LEAVE_GL();
1927
1928     context_release(context);
1929
1930     resource_unload(resource);
1931 }
1932
1933 static const struct wined3d_resource_ops surface_resource_ops =
1934 {
1935     surface_unload,
1936 };
1937
1938 static const struct wined3d_surface_ops surface_ops =
1939 {
1940     surface_private_setup,
1941     surface_cleanup,
1942     surface_realize_palette,
1943     surface_draw_overlay,
1944     surface_preload,
1945     surface_map,
1946     surface_unmap,
1947     surface_getdc,
1948     surface_flip,
1949     surface_set_mem,
1950 };
1951
1952 /*****************************************************************************
1953  * Initializes the GDI surface, aka creates the DIB section we render to
1954  * The DIB section creation is done by calling GetDC, which will create the
1955  * section and releasing the dc to allow the app to use it. The dib section
1956  * will stay until the surface is released
1957  *
1958  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1959  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1960  * avoid confusion in the shared surface code.
1961  *
1962  * Returns:
1963  *  WINED3D_OK on success
1964  *  The return values of called methods on failure
1965  *
1966  *****************************************************************************/
1967 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1968 {
1969     HRESULT hr;
1970
1971     TRACE("surface %p.\n", surface);
1972
1973     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1974     {
1975         ERR("Overlays not yet supported by GDI surfaces.\n");
1976         return WINED3DERR_INVALIDCALL;
1977     }
1978
1979     /* Sysmem textures have memory already allocated - release it,
1980      * this avoids an unnecessary memcpy. */
1981     hr = surface_create_dib_section(surface);
1982     if (SUCCEEDED(hr))
1983     {
1984         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1985         surface->resource.heapMemory = NULL;
1986         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1987     }
1988
1989     /* We don't mind the nonpow2 stuff in GDI. */
1990     surface->pow2Width = surface->resource.width;
1991     surface->pow2Height = surface->resource.height;
1992
1993     return WINED3D_OK;
1994 }
1995
1996 static void surface_gdi_cleanup(struct wined3d_surface *surface)
1997 {
1998     TRACE("surface %p.\n", surface);
1999
2000     if (surface->flags & SFLAG_DIBSECTION)
2001     {
2002         /* Release the DC. */
2003         SelectObject(surface->hDC, surface->dib.holdbitmap);
2004         DeleteDC(surface->hDC);
2005         /* Release the DIB section. */
2006         DeleteObject(surface->dib.DIBsection);
2007         surface->dib.bitmap_data = NULL;
2008         surface->resource.allocatedMemory = NULL;
2009     }
2010
2011     if (surface->flags & SFLAG_USERPTR)
2012         wined3d_surface_set_mem(surface, NULL);
2013     if (surface->overlay_dest)
2014         list_remove(&surface->overlay_entry);
2015
2016     HeapFree(GetProcessHeap(), 0, surface->palette9);
2017
2018     resource_cleanup(&surface->resource);
2019 }
2020
2021 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
2022 {
2023     struct wined3d_palette *palette = surface->palette;
2024
2025     TRACE("surface %p.\n", surface);
2026
2027     if (!palette) return;
2028
2029     if (surface->flags & SFLAG_DIBSECTION)
2030     {
2031         RGBQUAD col[256];
2032         unsigned int i;
2033
2034         TRACE("Updating the DC's palette.\n");
2035
2036         for (i = 0; i < 256; ++i)
2037         {
2038             col[i].rgbRed = palette->palents[i].peRed;
2039             col[i].rgbGreen = palette->palents[i].peGreen;
2040             col[i].rgbBlue = palette->palents[i].peBlue;
2041             col[i].rgbReserved = 0;
2042         }
2043         SetDIBColorTable(surface->hDC, 0, 256, col);
2044     }
2045
2046     /* Update the image because of the palette change. Some games like e.g.
2047      * Red Alert call SetEntries a lot to implement fading. */
2048     /* Tell the swapchain to update the screen. */
2049     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2050     {
2051         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2052         if (surface == swapchain->front_buffer)
2053         {
2054             x11_copy_to_screen(swapchain, NULL);
2055         }
2056     }
2057 }
2058
2059 static HRESULT gdi_surface_draw_overlay(struct wined3d_surface *surface)
2060 {
2061     FIXME("GDI surfaces can't draw overlays yet.\n");
2062     return E_FAIL;
2063 }
2064
2065 static void gdi_surface_preload(struct wined3d_surface *surface)
2066 {
2067     TRACE("surface %p.\n", surface);
2068
2069     ERR("Preloading GDI surfaces is not supported.\n");
2070 }
2071
2072 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
2073 {
2074     TRACE("surface %p, rect %s, flags %#x.\n",
2075             surface, wine_dbgstr_rect(rect), flags);
2076
2077     if (!surface->resource.allocatedMemory)
2078     {
2079         /* This happens on gdi surfaces if the application set a user pointer
2080          * and resets it. Recreate the DIB section. */
2081         surface_create_dib_section(surface);
2082         surface->resource.allocatedMemory = surface->dib.bitmap_data;
2083     }
2084 }
2085
2086 static void gdi_surface_unmap(struct wined3d_surface *surface)
2087 {
2088     TRACE("surface %p.\n", surface);
2089
2090     /* Tell the swapchain to update the screen. */
2091     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2092     {
2093         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2094         if (surface == swapchain->front_buffer)
2095         {
2096             x11_copy_to_screen(swapchain, &surface->lockedRect);
2097         }
2098     }
2099
2100     memset(&surface->lockedRect, 0, sizeof(RECT));
2101 }
2102
2103 static HRESULT gdi_surface_getdc(struct wined3d_surface *surface)
2104 {
2105     WINED3DLOCKED_RECT lock;
2106     HRESULT hr;
2107
2108     TRACE("surface %p.\n", surface);
2109
2110     /* Should have a DIB section already. */
2111     if (!(surface->flags & SFLAG_DIBSECTION))
2112     {
2113         WARN("DC not supported on this surface\n");
2114         return WINED3DERR_INVALIDCALL;
2115     }
2116
2117     /* Map the surface. */
2118     hr = wined3d_surface_map(surface, &lock, NULL, 0);
2119     if (FAILED(hr))
2120         ERR("Map failed, hr %#x.\n", hr);
2121
2122     return hr;
2123 }
2124
2125 static HRESULT gdi_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override)
2126 {
2127     TRACE("surface %p, override %p.\n", surface, override);
2128
2129     return WINED3D_OK;
2130 }
2131
2132 static HRESULT gdi_surface_set_mem(struct wined3d_surface *surface, void *mem)
2133 {
2134     TRACE("surface %p, mem %p.\n", surface, mem);
2135
2136     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
2137     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
2138     {
2139         ERR("Not supported on render targets.\n");
2140         return WINED3DERR_INVALIDCALL;
2141     }
2142
2143     if (mem && mem != surface->resource.allocatedMemory)
2144     {
2145         void *release = NULL;
2146
2147         /* Do I have to copy the old surface content? */
2148         if (surface->flags & SFLAG_DIBSECTION)
2149         {
2150             SelectObject(surface->hDC, surface->dib.holdbitmap);
2151             DeleteDC(surface->hDC);
2152             /* Release the DIB section. */
2153             DeleteObject(surface->dib.DIBsection);
2154             surface->dib.bitmap_data = NULL;
2155             surface->resource.allocatedMemory = NULL;
2156             surface->hDC = NULL;
2157             surface->flags &= ~SFLAG_DIBSECTION;
2158         }
2159         else if (!(surface->flags & SFLAG_USERPTR))
2160         {
2161             release = surface->resource.allocatedMemory;
2162         }
2163         surface->resource.allocatedMemory = mem;
2164         surface->flags |= SFLAG_USERPTR | SFLAG_INSYSMEM;
2165
2166         /* Now free the old memory, if any. */
2167         HeapFree(GetProcessHeap(), 0, release);
2168     }
2169     else if (surface->flags & SFLAG_USERPTR)
2170     {
2171         /* Map() and GetDC() will re-create the dib section and allocated memory. */
2172         surface->resource.allocatedMemory = NULL;
2173         surface->flags &= ~SFLAG_USERPTR;
2174     }
2175
2176     return WINED3D_OK;
2177 }
2178
2179 static const struct wined3d_surface_ops gdi_surface_ops =
2180 {
2181     gdi_surface_private_setup,
2182     surface_gdi_cleanup,
2183     gdi_surface_realize_palette,
2184     gdi_surface_draw_overlay,
2185     gdi_surface_preload,
2186     gdi_surface_map,
2187     gdi_surface_unmap,
2188     gdi_surface_getdc,
2189     gdi_surface_flip,
2190     gdi_surface_set_mem,
2191 };
2192
2193 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2194 {
2195     GLuint *name;
2196     DWORD flag;
2197
2198     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2199
2200     if(srgb)
2201     {
2202         name = &surface->texture_name_srgb;
2203         flag = SFLAG_INSRGBTEX;
2204     }
2205     else
2206     {
2207         name = &surface->texture_name;
2208         flag = SFLAG_INTEXTURE;
2209     }
2210
2211     if (!*name && new_name)
2212     {
2213         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2214          * surface has no texture name yet. See if we can get rid of this. */
2215         if (surface->flags & flag)
2216             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2217         surface_modify_location(surface, flag, FALSE);
2218     }
2219
2220     *name = new_name;
2221     surface_force_reload(surface);
2222 }
2223
2224 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
2225 {
2226     TRACE("surface %p, target %#x.\n", surface, target);
2227
2228     if (surface->texture_target != target)
2229     {
2230         if (target == GL_TEXTURE_RECTANGLE_ARB)
2231         {
2232             surface->flags &= ~SFLAG_NORMCOORD;
2233         }
2234         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2235         {
2236             surface->flags |= SFLAG_NORMCOORD;
2237         }
2238     }
2239     surface->texture_target = target;
2240     surface_force_reload(surface);
2241 }
2242
2243 /* Context activation is done by the caller. */
2244 void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
2245 {
2246     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
2247
2248     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2249     {
2250         struct wined3d_texture *texture = surface->container.u.texture;
2251
2252         TRACE("Passing to container (%p).\n", texture);
2253         texture->texture_ops->texture_bind(texture, context, srgb);
2254     }
2255     else
2256     {
2257         if (surface->texture_level)
2258         {
2259             ERR("Standalone surface %p is non-zero texture level %u.\n",
2260                     surface, surface->texture_level);
2261         }
2262
2263         if (srgb)
2264             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
2265
2266         ENTER_GL();
2267
2268         if (!surface->texture_name)
2269         {
2270             glGenTextures(1, &surface->texture_name);
2271             checkGLcall("glGenTextures");
2272
2273             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
2274
2275             context_bind_texture(context, surface->texture_target, surface->texture_name);
2276             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2277             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2278             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
2279             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2280             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2281             checkGLcall("glTexParameteri");
2282         }
2283         else
2284         {
2285             context_bind_texture(context, surface->texture_target, surface->texture_name);
2286         }
2287
2288         LEAVE_GL();
2289     }
2290 }
2291
2292 /* This call just downloads data, the caller is responsible for binding the
2293  * correct texture. */
2294 /* Context activation is done by the caller. */
2295 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2296 {
2297     const struct wined3d_format *format = surface->resource.format;
2298
2299     /* Only support read back of converted P8 surfaces. */
2300     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2301     {
2302         FIXME("Readback conversion not supported for format %s.\n", debug_d3dformat(format->id));
2303         return;
2304     }
2305
2306     ENTER_GL();
2307
2308     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2309     {
2310         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2311                 surface, surface->texture_level, format->glFormat, format->glType,
2312                 surface->resource.allocatedMemory);
2313
2314         if (surface->flags & SFLAG_PBO)
2315         {
2316             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2317             checkGLcall("glBindBufferARB");
2318             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2319             checkGLcall("glGetCompressedTexImageARB");
2320             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2321             checkGLcall("glBindBufferARB");
2322         }
2323         else
2324         {
2325             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2326                     surface->texture_level, surface->resource.allocatedMemory));
2327             checkGLcall("glGetCompressedTexImageARB");
2328         }
2329
2330         LEAVE_GL();
2331     }
2332     else
2333     {
2334         void *mem;
2335         GLenum gl_format = format->glFormat;
2336         GLenum gl_type = format->glType;
2337         int src_pitch = 0;
2338         int dst_pitch = 0;
2339
2340         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2341         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2342         {
2343             gl_format = GL_ALPHA;
2344             gl_type = GL_UNSIGNED_BYTE;
2345         }
2346
2347         if (surface->flags & SFLAG_NONPOW2)
2348         {
2349             unsigned char alignment = surface->resource.device->surface_alignment;
2350             src_pitch = format->byte_count * surface->pow2Width;
2351             dst_pitch = wined3d_surface_get_pitch(surface);
2352             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2353             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2354         }
2355         else
2356         {
2357             mem = surface->resource.allocatedMemory;
2358         }
2359
2360         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2361                 surface, surface->texture_level, gl_format, gl_type, mem);
2362
2363         if (surface->flags & SFLAG_PBO)
2364         {
2365             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2366             checkGLcall("glBindBufferARB");
2367
2368             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2369             checkGLcall("glGetTexImage");
2370
2371             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2372             checkGLcall("glBindBufferARB");
2373         }
2374         else
2375         {
2376             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2377             checkGLcall("glGetTexImage");
2378         }
2379         LEAVE_GL();
2380
2381         if (surface->flags & SFLAG_NONPOW2)
2382         {
2383             const BYTE *src_data;
2384             BYTE *dst_data;
2385             UINT y;
2386             /*
2387              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2388              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2389              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2390              *
2391              * We're doing this...
2392              *
2393              * instead of boxing the texture :
2394              * |<-texture width ->|  -->pow2width|   /\
2395              * |111111111111111111|              |   |
2396              * |222 Texture 222222| boxed empty  | texture height
2397              * |3333 Data 33333333|              |   |
2398              * |444444444444444444|              |   \/
2399              * -----------------------------------   |
2400              * |     boxed  empty | boxed empty  | pow2height
2401              * |                  |              |   \/
2402              * -----------------------------------
2403              *
2404              *
2405              * we're repacking the data to the expected texture width
2406              *
2407              * |<-texture width ->|  -->pow2width|   /\
2408              * |111111111111111111222222222222222|   |
2409              * |222333333333333333333444444444444| texture height
2410              * |444444                           |   |
2411              * |                                 |   \/
2412              * |                                 |   |
2413              * |            empty                | pow2height
2414              * |                                 |   \/
2415              * -----------------------------------
2416              *
2417              * == is the same as
2418              *
2419              * |<-texture width ->|    /\
2420              * |111111111111111111|
2421              * |222222222222222222|texture height
2422              * |333333333333333333|
2423              * |444444444444444444|    \/
2424              * --------------------
2425              *
2426              * this also means that any references to allocatedMemory should work with the data as if were a
2427              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2428              *
2429              * internally the texture is still stored in a boxed format so any references to textureName will
2430              * get a boxed texture with width pow2width and not a texture of width resource.width.
2431              *
2432              * Performance should not be an issue, because applications normally do not lock the surfaces when
2433              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2434              * and doesn't have to be re-read. */
2435             src_data = mem;
2436             dst_data = surface->resource.allocatedMemory;
2437             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2438             for (y = 1; y < surface->resource.height; ++y)
2439             {
2440                 /* skip the first row */
2441                 src_data += src_pitch;
2442                 dst_data += dst_pitch;
2443                 memcpy(dst_data, src_data, dst_pitch);
2444             }
2445
2446             HeapFree(GetProcessHeap(), 0, mem);
2447         }
2448     }
2449
2450     /* Surface has now been downloaded */
2451     surface->flags |= SFLAG_INSYSMEM;
2452 }
2453
2454 /* This call just uploads data, the caller is responsible for binding the
2455  * correct texture. */
2456 /* Context activation is done by the caller. */
2457 void surface_upload_data(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2458         const struct wined3d_format *format, const RECT *src_rect, UINT src_w, const POINT *dst_point,
2459         BOOL srgb, const struct wined3d_bo_address *data)
2460 {
2461     UINT update_w = src_rect->right - src_rect->left;
2462     UINT update_h = src_rect->bottom - src_rect->top;
2463
2464     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_w %u, dst_point %p, srgb %#x, data {%#x:%p}.\n",
2465             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_w,
2466             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2467
2468     if (format->heightscale != 1.0f && format->heightscale != 0.0f)
2469         update_h *= format->heightscale;
2470
2471     ENTER_GL();
2472
2473     if (data->buffer_object)
2474     {
2475         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2476         checkGLcall("glBindBufferARB");
2477     }
2478
2479     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2480     {
2481         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2482         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2483         UINT src_pitch = wined3d_format_calculate_size(format, 1, src_w, 1);
2484         const BYTE *addr = data->addr;
2485         GLenum internal;
2486
2487         addr += (src_rect->top / format->block_height) * src_pitch;
2488         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2489
2490         if (srgb)
2491             internal = format->glGammaInternal;
2492         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2493             internal = format->rtInternal;
2494         else
2495             internal = format->glInternal;
2496
2497         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2498                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2499                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2500
2501         if (row_length == src_pitch)
2502         {
2503             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2504                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2505         }
2506         else
2507         {
2508             UINT row, y;
2509
2510             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2511              * can't use the unpack row length like below. */
2512             for (row = 0, y = dst_point->y; row < row_count; ++row)
2513             {
2514                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2515                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2516                 y += format->block_height;
2517                 addr += src_pitch;
2518             }
2519         }
2520         checkGLcall("glCompressedTexSubImage2DARB");
2521     }
2522     else
2523     {
2524         const BYTE *addr = data->addr;
2525
2526         addr += src_rect->top * src_w * format->byte_count;
2527         addr += src_rect->left * format->byte_count;
2528
2529         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2530                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2531                 update_w, update_h, format->glFormat, format->glType, addr);
2532
2533         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_w);
2534         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2535                 update_w, update_h, format->glFormat, format->glType, addr);
2536         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2537         checkGLcall("glTexSubImage2D");
2538     }
2539
2540     if (data->buffer_object)
2541     {
2542         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2543         checkGLcall("glBindBufferARB");
2544     }
2545
2546     LEAVE_GL();
2547
2548     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2549     {
2550         struct wined3d_device *device = surface->resource.device;
2551         unsigned int i;
2552
2553         for (i = 0; i < device->context_count; ++i)
2554         {
2555             context_surface_update(device->contexts[i], surface);
2556         }
2557     }
2558 }
2559
2560 /* This call just allocates the texture, the caller is responsible for binding
2561  * the correct texture. */
2562 /* Context activation is done by the caller. */
2563 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2564         const struct wined3d_format *format, BOOL srgb)
2565 {
2566     BOOL enable_client_storage = FALSE;
2567     GLsizei width = surface->pow2Width;
2568     GLsizei height = surface->pow2Height;
2569     const BYTE *mem = NULL;
2570     GLenum internal;
2571
2572     if (srgb)
2573     {
2574         internal = format->glGammaInternal;
2575     }
2576     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2577     {
2578         internal = format->rtInternal;
2579     }
2580     else
2581     {
2582         internal = format->glInternal;
2583     }
2584
2585     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2586
2587     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2588             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2589             internal, width, height, format->glFormat, format->glType);
2590
2591     ENTER_GL();
2592
2593     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2594     {
2595         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2596                 || !surface->resource.allocatedMemory)
2597         {
2598             /* In some cases we want to disable client storage.
2599              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2600              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2601              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2602              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2603              */
2604             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2605             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2606             surface->flags &= ~SFLAG_CLIENT;
2607             enable_client_storage = TRUE;
2608         }
2609         else
2610         {
2611             surface->flags |= SFLAG_CLIENT;
2612
2613             /* Point OpenGL to our allocated texture memory. Do not use
2614              * resource.allocatedMemory here because it might point into a
2615              * PBO. Instead use heapMemory, but get the alignment right. */
2616             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2617                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2618         }
2619     }
2620
2621     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2622     {
2623         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2624                 internal, width, height, 0, surface->resource.size, mem));
2625         checkGLcall("glCompressedTexImage2DARB");
2626     }
2627     else
2628     {
2629         glTexImage2D(surface->texture_target, surface->texture_level,
2630                 internal, width, height, 0, format->glFormat, format->glType, mem);
2631         checkGLcall("glTexImage2D");
2632     }
2633
2634     if(enable_client_storage) {
2635         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2636         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2637     }
2638     LEAVE_GL();
2639 }
2640
2641 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2642  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2643 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2644 /* GL locking is done by the caller */
2645 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2646 {
2647     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2648     struct wined3d_renderbuffer_entry *entry;
2649     GLuint renderbuffer = 0;
2650     unsigned int src_width, src_height;
2651     unsigned int width, height;
2652
2653     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2654     {
2655         width = rt->pow2Width;
2656         height = rt->pow2Height;
2657     }
2658     else
2659     {
2660         width = surface->pow2Width;
2661         height = surface->pow2Height;
2662     }
2663
2664     src_width = surface->pow2Width;
2665     src_height = surface->pow2Height;
2666
2667     /* A depth stencil smaller than the render target is not valid */
2668     if (width > src_width || height > src_height) return;
2669
2670     /* Remove any renderbuffer set if the sizes match */
2671     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2672             || (width == src_width && height == src_height))
2673     {
2674         surface->current_renderbuffer = NULL;
2675         return;
2676     }
2677
2678     /* Look if we've already got a renderbuffer of the correct dimensions */
2679     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2680     {
2681         if (entry->width == width && entry->height == height)
2682         {
2683             renderbuffer = entry->id;
2684             surface->current_renderbuffer = entry;
2685             break;
2686         }
2687     }
2688
2689     if (!renderbuffer)
2690     {
2691         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2692         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2693         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2694                 surface->resource.format->glInternal, width, height);
2695
2696         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2697         entry->width = width;
2698         entry->height = height;
2699         entry->id = renderbuffer;
2700         list_add_head(&surface->renderbuffers, &entry->entry);
2701
2702         surface->current_renderbuffer = entry;
2703     }
2704
2705     checkGLcall("set_compatible_renderbuffer");
2706 }
2707
2708 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2709 {
2710     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2711
2712     TRACE("surface %p.\n", surface);
2713
2714     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2715     {
2716         ERR("Surface %p is not on a swapchain.\n", surface);
2717         return GL_NONE;
2718     }
2719
2720     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2721     {
2722         if (swapchain->render_to_fbo)
2723         {
2724             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2725             return GL_COLOR_ATTACHMENT0;
2726         }
2727         TRACE("Returning GL_BACK\n");
2728         return GL_BACK;
2729     }
2730     else if (surface == swapchain->front_buffer)
2731     {
2732         TRACE("Returning GL_FRONT\n");
2733         return GL_FRONT;
2734     }
2735
2736     FIXME("Higher back buffer, returning GL_BACK\n");
2737     return GL_BACK;
2738 }
2739
2740 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2741 void surface_add_dirty_rect(struct wined3d_surface *surface, const WINED3DBOX *dirty_rect)
2742 {
2743     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2744
2745     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2746         /* No partial locking for textures yet. */
2747         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2748
2749     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2750     if (dirty_rect)
2751     {
2752         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->Left);
2753         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->Top);
2754         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->Right);
2755         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->Bottom);
2756     }
2757     else
2758     {
2759         surface->dirtyRect.left = 0;
2760         surface->dirtyRect.top = 0;
2761         surface->dirtyRect.right = surface->resource.width;
2762         surface->dirtyRect.bottom = surface->resource.height;
2763     }
2764
2765     /* if the container is a texture then mark it dirty. */
2766     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2767     {
2768         TRACE("Passing to container.\n");
2769         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2770     }
2771 }
2772
2773 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2774 {
2775     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2776     BOOL ck_changed;
2777
2778     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2779
2780     if (surface->resource.pool == WINED3DPOOL_SCRATCH)
2781     {
2782         ERR("Not supported on scratch surfaces.\n");
2783         return WINED3DERR_INVALIDCALL;
2784     }
2785
2786     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2787
2788     /* Reload if either the texture and sysmem have different ideas about the
2789      * color key, or the actual key values changed. */
2790     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2791             && (surface->glCKey.dwColorSpaceLowValue != surface->SrcBltCKey.dwColorSpaceLowValue
2792             || surface->glCKey.dwColorSpaceHighValue != surface->SrcBltCKey.dwColorSpaceHighValue)))
2793     {
2794         TRACE("Reloading because of color keying\n");
2795         /* To perform the color key conversion we need a sysmem copy of
2796          * the surface. Make sure we have it. */
2797
2798         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2799         /* Make sure the texture is reloaded because of the color key change,
2800          * this kills performance though :( */
2801         /* TODO: This is not necessarily needed with hw palettized texture support. */
2802         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2803         /* Switching color keying on / off may change the internal format. */
2804         if (ck_changed)
2805             surface_force_reload(surface);
2806     }
2807     else if (!(surface->flags & flag))
2808     {
2809         TRACE("Reloading because surface is dirty.\n");
2810     }
2811     else
2812     {
2813         TRACE("surface is already in texture\n");
2814         return WINED3D_OK;
2815     }
2816
2817     /* No partial locking for textures yet. */
2818     surface_load_location(surface, flag, NULL);
2819     surface_evict_sysmem(surface);
2820
2821     return WINED3D_OK;
2822 }
2823
2824 /* See also float_16_to_32() in wined3d_private.h */
2825 static inline unsigned short float_32_to_16(const float *in)
2826 {
2827     int exp = 0;
2828     float tmp = fabsf(*in);
2829     unsigned int mantissa;
2830     unsigned short ret;
2831
2832     /* Deal with special numbers */
2833     if (*in == 0.0f)
2834         return 0x0000;
2835     if (isnan(*in))
2836         return 0x7c01;
2837     if (isinf(*in))
2838         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2839
2840     if (tmp < powf(2, 10))
2841     {
2842         do
2843         {
2844             tmp = tmp * 2.0f;
2845             exp--;
2846         } while (tmp < powf(2, 10));
2847     }
2848     else if (tmp >= powf(2, 11))
2849     {
2850         do
2851         {
2852             tmp /= 2.0f;
2853             exp++;
2854         } while (tmp >= powf(2, 11));
2855     }
2856
2857     mantissa = (unsigned int)tmp;
2858     if (tmp - mantissa >= 0.5f)
2859         ++mantissa; /* Round to nearest, away from zero. */
2860
2861     exp += 10;  /* Normalize the mantissa. */
2862     exp += 15;  /* Exponent is encoded with excess 15. */
2863
2864     if (exp > 30) /* too big */
2865     {
2866         ret = 0x7c00; /* INF */
2867     }
2868     else if (exp <= 0)
2869     {
2870         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2871         while (exp <= 0)
2872         {
2873             mantissa = mantissa >> 1;
2874             ++exp;
2875         }
2876         ret = mantissa & 0x3ff;
2877     }
2878     else
2879     {
2880         ret = (exp << 10) | (mantissa & 0x3ff);
2881     }
2882
2883     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2884     return ret;
2885 }
2886
2887 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2888 {
2889     ULONG refcount;
2890
2891     TRACE("Surface %p, container %p of type %#x.\n",
2892             surface, surface->container.u.base, surface->container.type);
2893
2894     switch (surface->container.type)
2895     {
2896         case WINED3D_CONTAINER_TEXTURE:
2897             return wined3d_texture_incref(surface->container.u.texture);
2898
2899         case WINED3D_CONTAINER_SWAPCHAIN:
2900             return wined3d_swapchain_incref(surface->container.u.swapchain);
2901
2902         default:
2903             ERR("Unhandled container type %#x.\n", surface->container.type);
2904         case WINED3D_CONTAINER_NONE:
2905             break;
2906     }
2907
2908     refcount = InterlockedIncrement(&surface->resource.ref);
2909     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2910
2911     return refcount;
2912 }
2913
2914 /* Do not call while under the GL lock. */
2915 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2916 {
2917     ULONG refcount;
2918
2919     TRACE("Surface %p, container %p of type %#x.\n",
2920             surface, surface->container.u.base, surface->container.type);
2921
2922     switch (surface->container.type)
2923     {
2924         case WINED3D_CONTAINER_TEXTURE:
2925             return wined3d_texture_decref(surface->container.u.texture);
2926
2927         case WINED3D_CONTAINER_SWAPCHAIN:
2928             return wined3d_swapchain_decref(surface->container.u.swapchain);
2929
2930         default:
2931             ERR("Unhandled container type %#x.\n", surface->container.type);
2932         case WINED3D_CONTAINER_NONE:
2933             break;
2934     }
2935
2936     refcount = InterlockedDecrement(&surface->resource.ref);
2937     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2938
2939     if (!refcount)
2940     {
2941         surface->surface_ops->surface_cleanup(surface);
2942         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2943
2944         TRACE("Destroyed surface %p.\n", surface);
2945         HeapFree(GetProcessHeap(), 0, surface);
2946     }
2947
2948     return refcount;
2949 }
2950
2951 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2952 {
2953     return resource_set_priority(&surface->resource, priority);
2954 }
2955
2956 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2957 {
2958     return resource_get_priority(&surface->resource);
2959 }
2960
2961 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2962 {
2963     TRACE("surface %p.\n", surface);
2964
2965     surface->surface_ops->surface_preload(surface);
2966 }
2967
2968 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2969 {
2970     TRACE("surface %p.\n", surface);
2971
2972     return surface->resource.parent;
2973 }
2974
2975 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2976 {
2977     TRACE("surface %p.\n", surface);
2978
2979     return &surface->resource;
2980 }
2981
2982 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2983 {
2984     TRACE("surface %p, flags %#x.\n", surface, flags);
2985
2986     switch (flags)
2987     {
2988         case WINEDDGBS_CANBLT:
2989         case WINEDDGBS_ISBLTDONE:
2990             return WINED3D_OK;
2991
2992         default:
2993             return WINED3DERR_INVALIDCALL;
2994     }
2995 }
2996
2997 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2998 {
2999     TRACE("surface %p, flags %#x.\n", surface, flags);
3000
3001     /* XXX: DDERR_INVALIDSURFACETYPE */
3002
3003     switch (flags)
3004     {
3005         case WINEDDGFS_CANFLIP:
3006         case WINEDDGFS_ISFLIPDONE:
3007             return WINED3D_OK;
3008
3009         default:
3010             return WINED3DERR_INVALIDCALL;
3011     }
3012 }
3013
3014 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
3015 {
3016     TRACE("surface %p.\n", surface);
3017
3018     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
3019     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
3020 }
3021
3022 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
3023 {
3024     TRACE("surface %p.\n", surface);
3025
3026     /* So far we don't lose anything :) */
3027     surface->flags &= ~SFLAG_LOST;
3028     return WINED3D_OK;
3029 }
3030
3031 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
3032 {
3033     TRACE("surface %p, palette %p.\n", surface, palette);
3034
3035     if (surface->palette == palette)
3036     {
3037         TRACE("Nop palette change.\n");
3038         return WINED3D_OK;
3039     }
3040
3041     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
3042         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
3043
3044     surface->palette = palette;
3045
3046     if (palette)
3047     {
3048         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3049             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
3050
3051         surface->surface_ops->surface_realize_palette(surface);
3052     }
3053
3054     return WINED3D_OK;
3055 }
3056
3057 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
3058         DWORD flags, const WINEDDCOLORKEY *color_key)
3059 {
3060     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
3061
3062     if (flags & WINEDDCKEY_COLORSPACE)
3063     {
3064         FIXME(" colorkey value not supported (%08x) !\n", flags);
3065         return WINED3DERR_INVALIDCALL;
3066     }
3067
3068     /* Dirtify the surface, but only if a key was changed. */
3069     if (color_key)
3070     {
3071         switch (flags & ~WINEDDCKEY_COLORSPACE)
3072         {
3073             case WINEDDCKEY_DESTBLT:
3074                 surface->DestBltCKey = *color_key;
3075                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3076                 break;
3077
3078             case WINEDDCKEY_DESTOVERLAY:
3079                 surface->DestOverlayCKey = *color_key;
3080                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3081                 break;
3082
3083             case WINEDDCKEY_SRCOVERLAY:
3084                 surface->SrcOverlayCKey = *color_key;
3085                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3086                 break;
3087
3088             case WINEDDCKEY_SRCBLT:
3089                 surface->SrcBltCKey = *color_key;
3090                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3091                 break;
3092         }
3093     }
3094     else
3095     {
3096         switch (flags & ~WINEDDCKEY_COLORSPACE)
3097         {
3098             case WINEDDCKEY_DESTBLT:
3099                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3100                 break;
3101
3102             case WINEDDCKEY_DESTOVERLAY:
3103                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3104                 break;
3105
3106             case WINEDDCKEY_SRCOVERLAY:
3107                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3108                 break;
3109
3110             case WINEDDCKEY_SRCBLT:
3111                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3112                 break;
3113         }
3114     }
3115
3116     return WINED3D_OK;
3117 }
3118
3119 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3120 {
3121     TRACE("surface %p.\n", surface);
3122
3123     return surface->palette;
3124 }
3125
3126 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3127 {
3128     const struct wined3d_format *format = surface->resource.format;
3129     DWORD pitch;
3130
3131     TRACE("surface %p.\n", surface);
3132
3133     if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3134     {
3135         /* Since compressed formats are block based, pitch means the amount of
3136          * bytes to the next row of block rather than the next row of pixels. */
3137         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3138         pitch = row_block_count * format->block_byte_count;
3139     }
3140     else
3141     {
3142         unsigned char alignment = surface->resource.device->surface_alignment;
3143         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3144         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3145     }
3146
3147     TRACE("Returning %u.\n", pitch);
3148
3149     return pitch;
3150 }
3151
3152 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3153 {
3154     TRACE("surface %p, mem %p.\n", surface, mem);
3155
3156     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
3157     {
3158         WARN("Surface is locked or the DC is in use.\n");
3159         return WINED3DERR_INVALIDCALL;
3160     }
3161
3162     return surface->surface_ops->surface_set_mem(surface, mem);
3163 }
3164
3165 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3166 {
3167     LONG w, h;
3168
3169     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3170
3171     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3172     {
3173         WARN("Not an overlay surface.\n");
3174         return WINEDDERR_NOTAOVERLAYSURFACE;
3175     }
3176
3177     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3178     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3179     surface->overlay_destrect.left = x;
3180     surface->overlay_destrect.top = y;
3181     surface->overlay_destrect.right = x + w;
3182     surface->overlay_destrect.bottom = y + h;
3183
3184     surface->surface_ops->surface_draw_overlay(surface);
3185
3186     return WINED3D_OK;
3187 }
3188
3189 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3190 {
3191     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3192
3193     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3194     {
3195         TRACE("Not an overlay surface.\n");
3196         return WINEDDERR_NOTAOVERLAYSURFACE;
3197     }
3198
3199     if (!surface->overlay_dest)
3200     {
3201         TRACE("Overlay not visible.\n");
3202         *x = 0;
3203         *y = 0;
3204         return WINEDDERR_OVERLAYNOTVISIBLE;
3205     }
3206
3207     *x = surface->overlay_destrect.left;
3208     *y = surface->overlay_destrect.top;
3209
3210     TRACE("Returning position %d, %d.\n", *x, *y);
3211
3212     return WINED3D_OK;
3213 }
3214
3215 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3216         DWORD flags, struct wined3d_surface *ref)
3217 {
3218     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3219
3220     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3221     {
3222         TRACE("Not an overlay surface.\n");
3223         return WINEDDERR_NOTAOVERLAYSURFACE;
3224     }
3225
3226     return WINED3D_OK;
3227 }
3228
3229 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3230         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3231 {
3232     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3233             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3234
3235     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3236     {
3237         WARN("Not an overlay surface.\n");
3238         return WINEDDERR_NOTAOVERLAYSURFACE;
3239     }
3240     else if (!dst_surface)
3241     {
3242         WARN("Dest surface is NULL.\n");
3243         return WINED3DERR_INVALIDCALL;
3244     }
3245
3246     if (src_rect)
3247     {
3248         surface->overlay_srcrect = *src_rect;
3249     }
3250     else
3251     {
3252         surface->overlay_srcrect.left = 0;
3253         surface->overlay_srcrect.top = 0;
3254         surface->overlay_srcrect.right = surface->resource.width;
3255         surface->overlay_srcrect.bottom = surface->resource.height;
3256     }
3257
3258     if (dst_rect)
3259     {
3260         surface->overlay_destrect = *dst_rect;
3261     }
3262     else
3263     {
3264         surface->overlay_destrect.left = 0;
3265         surface->overlay_destrect.top = 0;
3266         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3267         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3268     }
3269
3270     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3271     {
3272         list_remove(&surface->overlay_entry);
3273     }
3274
3275     if (flags & WINEDDOVER_SHOW)
3276     {
3277         if (surface->overlay_dest != dst_surface)
3278         {
3279             surface->overlay_dest = dst_surface;
3280             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3281         }
3282     }
3283     else if (flags & WINEDDOVER_HIDE)
3284     {
3285         /* tests show that the rectangles are erased on hide */
3286         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3287         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3288         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3289         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3290         surface->overlay_dest = NULL;
3291     }
3292
3293     surface->surface_ops->surface_draw_overlay(surface);
3294
3295     return WINED3D_OK;
3296 }
3297
3298 HRESULT CDECL wined3d_surface_set_clipper(struct wined3d_surface *surface, struct wined3d_clipper *clipper)
3299 {
3300     TRACE("surface %p, clipper %p.\n", surface, clipper);
3301
3302     surface->clipper = clipper;
3303
3304     return WINED3D_OK;
3305 }
3306
3307 struct wined3d_clipper * CDECL wined3d_surface_get_clipper(const struct wined3d_surface *surface)
3308 {
3309     TRACE("surface %p.\n", surface);
3310
3311     return surface->clipper;
3312 }
3313
3314 HRESULT CDECL wined3d_surface_set_format(struct wined3d_surface *surface, enum wined3d_format_id format_id)
3315 {
3316     const struct wined3d_format *format = wined3d_get_format(&surface->resource.device->adapter->gl_info, format_id);
3317
3318     TRACE("surface %p, format %s.\n", surface, debug_d3dformat(format_id));
3319
3320     if (surface->resource.format->id != WINED3DFMT_UNKNOWN)
3321     {
3322         FIXME("The format of the surface must be WINED3DFORMAT_UNKNOWN.\n");
3323         return WINED3DERR_INVALIDCALL;
3324     }
3325
3326     surface->resource.size = wined3d_format_calculate_size(format, surface->resource.device->surface_alignment,
3327             surface->pow2Width, surface->pow2Height);
3328     surface->flags |= (WINED3DFMT_D16_LOCKABLE == format_id) ? SFLAG_LOCKABLE : 0;
3329     surface->resource.format = format;
3330
3331     TRACE("size %u, byte_count %u\n", surface->resource.size, format->byte_count);
3332     TRACE("glFormat %#x, glInternal %#x, glType %#x.\n",
3333             format->glFormat, format->glInternal, format->glType);
3334
3335     return WINED3D_OK;
3336 }
3337
3338 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3339         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3340 {
3341     unsigned short *dst_s;
3342     const float *src_f;
3343     unsigned int x, y;
3344
3345     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3346
3347     for (y = 0; y < h; ++y)
3348     {
3349         src_f = (const float *)(src + y * pitch_in);
3350         dst_s = (unsigned short *) (dst + y * pitch_out);
3351         for (x = 0; x < w; ++x)
3352         {
3353             dst_s[x] = float_32_to_16(src_f + x);
3354         }
3355     }
3356 }
3357
3358 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3359         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3360 {
3361     static const unsigned char convert_5to8[] =
3362     {
3363         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3364         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3365         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3366         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3367     };
3368     static const unsigned char convert_6to8[] =
3369     {
3370         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3371         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3372         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3373         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3374         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3375         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3376         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3377         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3378     };
3379     unsigned int x, y;
3380
3381     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3382
3383     for (y = 0; y < h; ++y)
3384     {
3385         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3386         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3387         for (x = 0; x < w; ++x)
3388         {
3389             WORD pixel = src_line[x];
3390             dst_line[x] = 0xff000000
3391                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3392                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3393                     | convert_5to8[(pixel & 0x001f)];
3394         }
3395     }
3396 }
3397
3398 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3399  * in both cases we're just setting the X / Alpha channel to 0xff. */
3400 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3401         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3402 {
3403     unsigned int x, y;
3404
3405     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3406
3407     for (y = 0; y < h; ++y)
3408     {
3409         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3410         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3411
3412         for (x = 0; x < w; ++x)
3413         {
3414             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3415         }
3416     }
3417 }
3418
3419 static inline BYTE cliptobyte(int x)
3420 {
3421     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3422 }
3423
3424 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3425         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3426 {
3427     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3428     unsigned int x, y;
3429
3430     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3431
3432     for (y = 0; y < h; ++y)
3433     {
3434         const BYTE *src_line = src + y * pitch_in;
3435         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3436         for (x = 0; x < w; ++x)
3437         {
3438             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3439              *     C = Y - 16; D = U - 128; E = V - 128;
3440              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3441              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3442              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3443              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3444              * U and V are shared between the pixels. */
3445             if (!(x & 1)) /* For every even pixel, read new U and V. */
3446             {
3447                 d = (int) src_line[1] - 128;
3448                 e = (int) src_line[3] - 128;
3449                 r2 = 409 * e + 128;
3450                 g2 = - 100 * d - 208 * e + 128;
3451                 b2 = 516 * d + 128;
3452             }
3453             c2 = 298 * ((int) src_line[0] - 16);
3454             dst_line[x] = 0xff000000
3455                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3456                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3457                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3458                 /* Scale RGB values to 0..255 range,
3459                  * then clip them if still not in range (may be negative),
3460                  * then shift them within DWORD if necessary. */
3461             src_line += 2;
3462         }
3463     }
3464 }
3465
3466 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3467         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3468 {
3469     unsigned int x, y;
3470     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3471
3472     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3473
3474     for (y = 0; y < h; ++y)
3475     {
3476         const BYTE *src_line = src + y * pitch_in;
3477         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3478         for (x = 0; x < w; ++x)
3479         {
3480             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3481              *     C = Y - 16; D = U - 128; E = V - 128;
3482              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3483              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3484              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3485              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3486              * U and V are shared between the pixels. */
3487             if (!(x & 1)) /* For every even pixel, read new U and V. */
3488             {
3489                 d = (int) src_line[1] - 128;
3490                 e = (int) src_line[3] - 128;
3491                 r2 = 409 * e + 128;
3492                 g2 = - 100 * d - 208 * e + 128;
3493                 b2 = 516 * d + 128;
3494             }
3495             c2 = 298 * ((int) src_line[0] - 16);
3496             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3497                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3498                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3499                 /* Scale RGB values to 0..255 range,
3500                  * then clip them if still not in range (may be negative),
3501                  * then shift them within DWORD if necessary. */
3502             src_line += 2;
3503         }
3504     }
3505 }
3506
3507 struct d3dfmt_convertor_desc
3508 {
3509     enum wined3d_format_id from, to;
3510     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3511 };
3512
3513 static const struct d3dfmt_convertor_desc convertors[] =
3514 {
3515     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3516     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3517     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3518     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3519     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3520     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3521 };
3522
3523 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3524         enum wined3d_format_id to)
3525 {
3526     unsigned int i;
3527
3528     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3529     {
3530         if (convertors[i].from == from && convertors[i].to == to)
3531             return &convertors[i];
3532     }
3533
3534     return NULL;
3535 }
3536
3537 /*****************************************************************************
3538  * surface_convert_format
3539  *
3540  * Creates a duplicate of a surface in a different format. Is used by Blt to
3541  * blit between surfaces with different formats.
3542  *
3543  * Parameters
3544  *  source: Source surface
3545  *  fmt: Requested destination format
3546  *
3547  *****************************************************************************/
3548 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3549 {
3550     const struct d3dfmt_convertor_desc *conv;
3551     WINED3DLOCKED_RECT lock_src, lock_dst;
3552     struct wined3d_surface *ret = NULL;
3553     HRESULT hr;
3554
3555     conv = find_convertor(source->resource.format->id, to_fmt);
3556     if (!conv)
3557     {
3558         FIXME("Cannot find a conversion function from format %s to %s.\n",
3559                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3560         return NULL;
3561     }
3562
3563     wined3d_surface_create(source->resource.device, source->resource.width,
3564             source->resource.height, to_fmt, TRUE /* lockable */, TRUE /* discard  */, 0 /* level */,
3565             0 /* usage */, WINED3DPOOL_SCRATCH, WINED3DMULTISAMPLE_NONE /* TODO: Multisampled conversion */,
3566             0 /* MultiSampleQuality */, source->surface_type, NULL /* parent */, &wined3d_null_parent_ops, &ret);
3567     if (!ret)
3568     {
3569         ERR("Failed to create a destination surface for conversion.\n");
3570         return NULL;
3571     }
3572
3573     memset(&lock_src, 0, sizeof(lock_src));
3574     memset(&lock_dst, 0, sizeof(lock_dst));
3575
3576     hr = wined3d_surface_map(source, &lock_src, NULL, WINED3DLOCK_READONLY);
3577     if (FAILED(hr))
3578     {
3579         ERR("Failed to lock the source surface.\n");
3580         wined3d_surface_decref(ret);
3581         return NULL;
3582     }
3583     hr = wined3d_surface_map(ret, &lock_dst, NULL, WINED3DLOCK_READONLY);
3584     if (FAILED(hr))
3585     {
3586         ERR("Failed to lock the destination surface.\n");
3587         wined3d_surface_unmap(source);
3588         wined3d_surface_decref(ret);
3589         return NULL;
3590     }
3591
3592     conv->convert(lock_src.pBits, lock_dst.pBits, lock_src.Pitch, lock_dst.Pitch,
3593             source->resource.width, source->resource.height);
3594
3595     wined3d_surface_unmap(ret);
3596     wined3d_surface_unmap(source);
3597
3598     return ret;
3599 }
3600
3601 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3602         unsigned int bpp, UINT pitch, DWORD color)
3603 {
3604     BYTE *first;
3605     int x, y;
3606
3607     /* Do first row */
3608
3609 #define COLORFILL_ROW(type) \
3610 do { \
3611     type *d = (type *)buf; \
3612     for (x = 0; x < width; ++x) \
3613         d[x] = (type)color; \
3614 } while(0)
3615
3616     switch (bpp)
3617     {
3618         case 1:
3619             COLORFILL_ROW(BYTE);
3620             break;
3621
3622         case 2:
3623             COLORFILL_ROW(WORD);
3624             break;
3625
3626         case 3:
3627         {
3628             BYTE *d = buf;
3629             for (x = 0; x < width; ++x, d += 3)
3630             {
3631                 d[0] = (color      ) & 0xFF;
3632                 d[1] = (color >>  8) & 0xFF;
3633                 d[2] = (color >> 16) & 0xFF;
3634             }
3635             break;
3636         }
3637         case 4:
3638             COLORFILL_ROW(DWORD);
3639             break;
3640
3641         default:
3642             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3643             return WINED3DERR_NOTAVAILABLE;
3644     }
3645
3646 #undef COLORFILL_ROW
3647
3648     /* Now copy first row. */
3649     first = buf;
3650     for (y = 1; y < height; ++y)
3651     {
3652         buf += pitch;
3653         memcpy(buf, first, width * bpp);
3654     }
3655
3656     return WINED3D_OK;
3657 }
3658
3659 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3660 {
3661     TRACE("surface %p.\n", surface);
3662
3663     if (!(surface->flags & SFLAG_LOCKED))
3664     {
3665         WARN("Trying to unmap unmapped surface.\n");
3666         return WINEDDERR_NOTLOCKED;
3667     }
3668     surface->flags &= ~SFLAG_LOCKED;
3669
3670     surface->surface_ops->surface_unmap(surface);
3671
3672     return WINED3D_OK;
3673 }
3674
3675 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3676         WINED3DLOCKED_RECT *locked_rect, const RECT *rect, DWORD flags)
3677 {
3678     TRACE("surface %p, locked_rect %p, rect %s, flags %#x.\n",
3679             surface, locked_rect, wine_dbgstr_rect(rect), flags);
3680
3681     if (surface->flags & SFLAG_LOCKED)
3682     {
3683         WARN("Surface is already mapped.\n");
3684         return WINED3DERR_INVALIDCALL;
3685     }
3686     surface->flags |= SFLAG_LOCKED;
3687
3688     if (!(surface->flags & SFLAG_LOCKABLE))
3689         WARN("Trying to lock unlockable surface.\n");
3690
3691     surface->surface_ops->surface_map(surface, rect, flags);
3692
3693     locked_rect->Pitch = wined3d_surface_get_pitch(surface);
3694
3695     if (!rect)
3696     {
3697         locked_rect->pBits = surface->resource.allocatedMemory;
3698         surface->lockedRect.left = 0;
3699         surface->lockedRect.top = 0;
3700         surface->lockedRect.right = surface->resource.width;
3701         surface->lockedRect.bottom = surface->resource.height;
3702     }
3703     else
3704     {
3705         const struct wined3d_format *format = surface->resource.format;
3706
3707         if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3708         {
3709             /* Compressed textures are block based, so calculate the offset of
3710              * the block that contains the top-left pixel of the locked rectangle. */
3711             locked_rect->pBits = surface->resource.allocatedMemory
3712                     + ((rect->top / format->block_height) * locked_rect->Pitch)
3713                     + ((rect->left / format->block_width) * format->block_byte_count);
3714         }
3715         else
3716         {
3717             locked_rect->pBits = surface->resource.allocatedMemory
3718                     + (locked_rect->Pitch * rect->top)
3719                     + (rect->left * format->byte_count);
3720         }
3721         surface->lockedRect.left = rect->left;
3722         surface->lockedRect.top = rect->top;
3723         surface->lockedRect.right = rect->right;
3724         surface->lockedRect.bottom = rect->bottom;
3725     }
3726
3727     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3728     TRACE("Returning memory %p, pitch %u.\n", locked_rect->pBits, locked_rect->Pitch);
3729
3730     return WINED3D_OK;
3731 }
3732
3733 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3734 {
3735     HRESULT hr;
3736
3737     TRACE("surface %p, dc %p.\n", surface, dc);
3738
3739     if (surface->flags & SFLAG_USERPTR)
3740     {
3741         ERR("Not supported on surfaces with application-provided memory.\n");
3742         return WINEDDERR_NODC;
3743     }
3744
3745     /* Give more detailed info for ddraw. */
3746     if (surface->flags & SFLAG_DCINUSE)
3747         return WINEDDERR_DCALREADYCREATED;
3748
3749     /* Can't GetDC if the surface is locked. */
3750     if (surface->flags & SFLAG_LOCKED)
3751         return WINED3DERR_INVALIDCALL;
3752
3753     hr = surface->surface_ops->surface_getdc(surface);
3754     if (FAILED(hr))
3755         return hr;
3756
3757     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3758             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3759     {
3760         /* GetDC on palettized formats is unsupported in D3D9, and the method
3761          * is missing in D3D8, so this should only be used for DX <=7
3762          * surfaces (with non-device palettes). */
3763         const PALETTEENTRY *pal = NULL;
3764
3765         if (surface->palette)
3766         {
3767             pal = surface->palette->palents;
3768         }
3769         else
3770         {
3771             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3772             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3773
3774             if (dds_primary && dds_primary->palette)
3775                 pal = dds_primary->palette->palents;
3776         }
3777
3778         if (pal)
3779         {
3780             RGBQUAD col[256];
3781             unsigned int i;
3782
3783             for (i = 0; i < 256; ++i)
3784             {
3785                 col[i].rgbRed = pal[i].peRed;
3786                 col[i].rgbGreen = pal[i].peGreen;
3787                 col[i].rgbBlue = pal[i].peBlue;
3788                 col[i].rgbReserved = 0;
3789             }
3790             SetDIBColorTable(surface->hDC, 0, 256, col);
3791         }
3792     }
3793
3794     surface->flags |= SFLAG_DCINUSE;
3795
3796     *dc = surface->hDC;
3797     TRACE("Returning dc %p.\n", *dc);
3798
3799     return WINED3D_OK;
3800 }
3801
3802 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3803 {
3804     TRACE("surface %p, dc %p.\n", surface, dc);
3805
3806     if (!(surface->flags & SFLAG_DCINUSE))
3807         return WINEDDERR_NODC;
3808
3809     if (surface->hDC != dc)
3810     {
3811         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3812                 dc, surface->hDC);
3813         return WINEDDERR_NODC;
3814     }
3815
3816     if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
3817     {
3818         /* Copy the contents of the DIB over to the PBO. */
3819         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->dib.bitmap_size);
3820     }
3821
3822     /* We locked first, so unlock now. */
3823     wined3d_surface_unmap(surface);
3824
3825     surface->flags &= ~SFLAG_DCINUSE;
3826
3827     return WINED3D_OK;
3828 }
3829
3830 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3831 {
3832     struct wined3d_swapchain *swapchain;
3833     HRESULT hr;
3834
3835     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3836
3837     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
3838     {
3839         ERR("Flipped surface is not on a swapchain.\n");
3840         return WINEDDERR_NOTFLIPPABLE;
3841     }
3842     swapchain = surface->container.u.swapchain;
3843
3844     hr = surface->surface_ops->surface_flip(surface, override);
3845     if (FAILED(hr))
3846         return hr;
3847
3848     /* Just overwrite the swapchain presentation interval. This is ok because
3849      * only ddraw apps can call Flip, and only d3d8 and d3d9 applications
3850      * specify the presentation interval. */
3851     if (!(flags & (WINEDDFLIP_NOVSYNC | WINEDDFLIP_INTERVAL2 | WINEDDFLIP_INTERVAL3 | WINEDDFLIP_INTERVAL4)))
3852         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_ONE;
3853     else if (flags & WINEDDFLIP_NOVSYNC)
3854         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_IMMEDIATE;
3855     else if (flags & WINEDDFLIP_INTERVAL2)
3856         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_TWO;
3857     else if (flags & WINEDDFLIP_INTERVAL3)
3858         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_THREE;
3859     else
3860         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_FOUR;
3861
3862     return wined3d_swapchain_present(swapchain, NULL, NULL, swapchain->win_handle, NULL, 0);
3863 }
3864
3865 /* Do not call while under the GL lock. */
3866 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3867 {
3868     struct wined3d_device *device = surface->resource.device;
3869
3870     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3871
3872     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3873     {
3874         struct wined3d_texture *texture = surface->container.u.texture;
3875
3876         TRACE("Passing to container (%p).\n", texture);
3877         texture->texture_ops->texture_preload(texture, srgb);
3878     }
3879     else
3880     {
3881         struct wined3d_context *context;
3882
3883         TRACE("(%p) : About to load surface\n", surface);
3884
3885         /* TODO: Use already acquired context when possible. */
3886         context = context_acquire(device, NULL);
3887
3888         if (surface->resource.format->id == WINED3DFMT_P8_UINT
3889                 || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3890         {
3891             if (palette9_changed(surface))
3892             {
3893                 TRACE("Reloading surface because the d3d8/9 palette was changed\n");
3894                 /* TODO: This is not necessarily needed with hw palettized texture support */
3895                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3896                 /* Make sure the texture is reloaded because of the palette change, this kills performance though :( */
3897                 surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
3898             }
3899         }
3900
3901         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3902
3903         if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3904         {
3905             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3906             GLclampf tmp;
3907             tmp = 0.9f;
3908             ENTER_GL();
3909             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3910             LEAVE_GL();
3911         }
3912
3913         context_release(context);
3914     }
3915 }
3916
3917 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3918 {
3919     if (!surface->resource.allocatedMemory)
3920     {
3921         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
3922                 surface->resource.size + RESOURCE_ALIGNMENT);
3923         if (!surface->resource.heapMemory)
3924         {
3925             ERR("Out of memory\n");
3926             return FALSE;
3927         }
3928         surface->resource.allocatedMemory =
3929             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
3930     }
3931     else
3932     {
3933         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
3934     }
3935
3936     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3937
3938     return TRUE;
3939 }
3940
3941 /* Read the framebuffer back into the surface */
3942 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
3943 {
3944     struct wined3d_device *device = surface->resource.device;
3945     const struct wined3d_gl_info *gl_info;
3946     struct wined3d_context *context;
3947     BYTE *mem;
3948     GLint fmt;
3949     GLint type;
3950     BYTE *row, *top, *bottom;
3951     int i;
3952     BOOL bpp;
3953     RECT local_rect;
3954     BOOL srcIsUpsideDown;
3955     GLint rowLen = 0;
3956     GLint skipPix = 0;
3957     GLint skipRow = 0;
3958
3959     if(wined3d_settings.rendertargetlock_mode == RTL_DISABLE) {
3960         static BOOL warned = FALSE;
3961         if(!warned) {
3962             ERR("The application tries to lock the render target, but render target locking is disabled\n");
3963             warned = TRUE;
3964         }
3965         return;
3966     }
3967
3968     context = context_acquire(device, surface);
3969     context_apply_blit_state(context, device);
3970     gl_info = context->gl_info;
3971
3972     ENTER_GL();
3973
3974     /* Select the correct read buffer, and give some debug output.
3975      * There is no need to keep track of the current read buffer or reset it, every part of the code
3976      * that reads sets the read buffer as desired.
3977      */
3978     if (surface_is_offscreen(surface))
3979     {
3980         /* Mapping the primary render target which is not on a swapchain.
3981          * Read from the back buffer. */
3982         TRACE("Mapping offscreen render target.\n");
3983         glReadBuffer(device->offscreenBuffer);
3984         srcIsUpsideDown = TRUE;
3985     }
3986     else
3987     {
3988         /* Onscreen surfaces are always part of a swapchain */
3989         GLenum buffer = surface_get_gl_buffer(surface);
3990         TRACE("Mapping %#x buffer.\n", buffer);
3991         glReadBuffer(buffer);
3992         checkGLcall("glReadBuffer");
3993         srcIsUpsideDown = FALSE;
3994     }
3995
3996     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
3997     if (!rect)
3998     {
3999         local_rect.left = 0;
4000         local_rect.top = 0;
4001         local_rect.right = surface->resource.width;
4002         local_rect.bottom = surface->resource.height;
4003     }
4004     else
4005     {
4006         local_rect = *rect;
4007     }
4008     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
4009
4010     switch (surface->resource.format->id)
4011     {
4012         case WINED3DFMT_P8_UINT:
4013         {
4014             if (primary_render_target_is_p8(device))
4015             {
4016                 /* In case of P8 render targets the index is stored in the alpha component */
4017                 fmt = GL_ALPHA;
4018                 type = GL_UNSIGNED_BYTE;
4019                 mem = dest;
4020                 bpp = surface->resource.format->byte_count;
4021             }
4022             else
4023             {
4024                 /* GL can't return palettized data, so read ARGB pixels into a
4025                  * separate block of memory and convert them into palettized format
4026                  * in software. Slow, but if the app means to use palettized render
4027                  * targets and locks it...
4028                  *
4029                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4030                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4031                  * for the color channels when palettizing the colors.
4032                  */
4033                 fmt = GL_RGB;
4034                 type = GL_UNSIGNED_BYTE;
4035                 pitch *= 3;
4036                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4037                 if (!mem)
4038                 {
4039                     ERR("Out of memory\n");
4040                     LEAVE_GL();
4041                     return;
4042                 }
4043                 bpp = surface->resource.format->byte_count * 3;
4044             }
4045         }
4046         break;
4047
4048         default:
4049             mem = dest;
4050             fmt = surface->resource.format->glFormat;
4051             type = surface->resource.format->glType;
4052             bpp = surface->resource.format->byte_count;
4053     }
4054
4055     if (surface->flags & SFLAG_PBO)
4056     {
4057         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4058         checkGLcall("glBindBufferARB");
4059         if (mem)
4060         {
4061             ERR("mem not null for pbo -- unexpected\n");
4062             mem = NULL;
4063         }
4064     }
4065
4066     /* Save old pixel store pack state */
4067     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4068     checkGLcall("glGetIntegerv");
4069     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4070     checkGLcall("glGetIntegerv");
4071     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4072     checkGLcall("glGetIntegerv");
4073
4074     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4075     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4076     checkGLcall("glPixelStorei");
4077     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4078     checkGLcall("glPixelStorei");
4079     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4080     checkGLcall("glPixelStorei");
4081
4082     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4083             local_rect.right - local_rect.left,
4084             local_rect.bottom - local_rect.top,
4085             fmt, type, mem);
4086     checkGLcall("glReadPixels");
4087
4088     /* Reset previous pixel store pack state */
4089     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4090     checkGLcall("glPixelStorei");
4091     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4092     checkGLcall("glPixelStorei");
4093     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4094     checkGLcall("glPixelStorei");
4095
4096     if (surface->flags & SFLAG_PBO)
4097     {
4098         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4099         checkGLcall("glBindBufferARB");
4100
4101         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4102          * to get a pointer to it and perform the flipping in software. This is a lot
4103          * faster than calling glReadPixels for each line. In case we want more speed
4104          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4105         if (!srcIsUpsideDown)
4106         {
4107             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4108             checkGLcall("glBindBufferARB");
4109
4110             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4111             checkGLcall("glMapBufferARB");
4112         }
4113     }
4114
4115     /* TODO: Merge this with the palettization loop below for P8 targets */
4116     if(!srcIsUpsideDown) {
4117         UINT len, off;
4118         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4119             Flip the lines in software */
4120         len = (local_rect.right - local_rect.left) * bpp;
4121         off = local_rect.left * bpp;
4122
4123         row = HeapAlloc(GetProcessHeap(), 0, len);
4124         if(!row) {
4125             ERR("Out of memory\n");
4126             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4127                 HeapFree(GetProcessHeap(), 0, mem);
4128             LEAVE_GL();
4129             return;
4130         }
4131
4132         top = mem + pitch * local_rect.top;
4133         bottom = mem + pitch * (local_rect.bottom - 1);
4134         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4135             memcpy(row, top + off, len);
4136             memcpy(top + off, bottom + off, len);
4137             memcpy(bottom + off, row, len);
4138             top += pitch;
4139             bottom -= pitch;
4140         }
4141         HeapFree(GetProcessHeap(), 0, row);
4142
4143         /* Unmap the temp PBO buffer */
4144         if (surface->flags & SFLAG_PBO)
4145         {
4146             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4147             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4148         }
4149     }
4150
4151     LEAVE_GL();
4152     context_release(context);
4153
4154     /* For P8 textures we need to perform an inverse palette lookup. This is
4155      * done by searching for a palette index which matches the RGB value.
4156      * Note this isn't guaranteed to work when there are multiple entries for
4157      * the same color but we have no choice. In case of P8 render targets,
4158      * the index is stored in the alpha component so no conversion is needed. */
4159     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4160     {
4161         const PALETTEENTRY *pal = NULL;
4162         DWORD width = pitch / 3;
4163         int x, y, c;
4164
4165         if (surface->palette)
4166         {
4167             pal = surface->palette->palents;
4168         }
4169         else
4170         {
4171             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4172             HeapFree(GetProcessHeap(), 0, mem);
4173             return;
4174         }
4175
4176         for(y = local_rect.top; y < local_rect.bottom; y++) {
4177             for(x = local_rect.left; x < local_rect.right; x++) {
4178                 /*                      start              lines            pixels      */
4179                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4180                 const BYTE *green = blue  + 1;
4181                 const BYTE *red = green + 1;
4182
4183                 for(c = 0; c < 256; c++) {
4184                     if(*red   == pal[c].peRed   &&
4185                        *green == pal[c].peGreen &&
4186                        *blue  == pal[c].peBlue)
4187                     {
4188                         *((BYTE *) dest + y * width + x) = c;
4189                         break;
4190                     }
4191                 }
4192             }
4193         }
4194         HeapFree(GetProcessHeap(), 0, mem);
4195     }
4196 }
4197
4198 /* Read the framebuffer contents into a texture */
4199 static void read_from_framebuffer_texture(struct wined3d_surface *surface, BOOL srgb)
4200 {
4201     struct wined3d_device *device = surface->resource.device;
4202     const struct wined3d_gl_info *gl_info;
4203     struct wined3d_context *context;
4204
4205     if (!surface_is_offscreen(surface))
4206     {
4207         /* We would need to flip onscreen surfaces, but there's no efficient
4208          * way to do that here. It makes more sense for the caller to
4209          * explicitly go through sysmem. */
4210         ERR("Not supported for onscreen targets.\n");
4211         return;
4212     }
4213
4214     /* Activate the surface to read from. In some situations it isn't the currently active target(e.g. backbuffer
4215      * locking during offscreen rendering). RESOURCELOAD is ok because glCopyTexSubImage2D isn't affected by any
4216      * states in the stateblock, and no driver was found yet that had bugs in that regard.
4217      */
4218     context = context_acquire(device, surface);
4219     gl_info = context->gl_info;
4220     device_invalidate_state(device, STATE_FRAMEBUFFER);
4221
4222     surface_prepare_texture(surface, context, srgb);
4223     surface_bind_and_dirtify(surface, context, srgb);
4224
4225     TRACE("Reading back offscreen render target %p.\n", surface);
4226
4227     ENTER_GL();
4228
4229     glReadBuffer(device->offscreenBuffer);
4230     checkGLcall("glReadBuffer");
4231
4232     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4233             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4234     checkGLcall("glCopyTexSubImage2D");
4235
4236     LEAVE_GL();
4237
4238     context_release(context);
4239 }
4240
4241 /* Context activation is done by the caller. */
4242 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4243         struct wined3d_context *context, BOOL srgb)
4244 {
4245     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4246     CONVERT_TYPES convert;
4247     struct wined3d_format format;
4248
4249     if (surface->flags & alloc_flag) return;
4250
4251     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4252     if (convert != NO_CONVERSION || format.convert) surface->flags |= SFLAG_CONVERTED;
4253     else surface->flags &= ~SFLAG_CONVERTED;
4254
4255     surface_bind_and_dirtify(surface, context, srgb);
4256     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4257     surface->flags |= alloc_flag;
4258 }
4259
4260 /* Context activation is done by the caller. */
4261 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4262 {
4263     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4264     {
4265         struct wined3d_texture *texture = surface->container.u.texture;
4266         UINT sub_count = texture->level_count * texture->layer_count;
4267         UINT i;
4268
4269         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4270
4271         for (i = 0; i < sub_count; ++i)
4272         {
4273             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4274             surface_prepare_texture_internal(s, context, srgb);
4275         }
4276
4277         return;
4278     }
4279
4280     surface_prepare_texture_internal(surface, context, srgb);
4281 }
4282
4283 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4284 {
4285     if (multisample)
4286     {
4287         if (surface->rb_multisample)
4288             return;
4289
4290         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4291         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4292         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4293                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4294         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4295     }
4296     else
4297     {
4298         if (surface->rb_resolved)
4299             return;
4300
4301         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4302         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4303         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4304                 surface->pow2Width, surface->pow2Height);
4305         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4306     }
4307 }
4308
4309 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4310         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4311 {
4312     struct wined3d_device *device = surface->resource.device;
4313     UINT pitch = wined3d_surface_get_pitch(surface);
4314     const struct wined3d_gl_info *gl_info;
4315     struct wined3d_context *context;
4316     RECT local_rect;
4317     UINT w, h;
4318
4319     surface_get_rect(surface, rect, &local_rect);
4320
4321     mem += local_rect.top * pitch + local_rect.left * bpp;
4322     w = local_rect.right - local_rect.left;
4323     h = local_rect.bottom - local_rect.top;
4324
4325     /* Activate the correct context for the render target */
4326     context = context_acquire(device, surface);
4327     context_apply_blit_state(context, device);
4328     gl_info = context->gl_info;
4329
4330     ENTER_GL();
4331
4332     if (!surface_is_offscreen(surface))
4333     {
4334         GLenum buffer = surface_get_gl_buffer(surface);
4335         TRACE("Unlocking %#x buffer.\n", buffer);
4336         context_set_draw_buffer(context, buffer);
4337
4338         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4339         glPixelZoom(1.0f, -1.0f);
4340     }
4341     else
4342     {
4343         /* Primary offscreen render target */
4344         TRACE("Offscreen render target.\n");
4345         context_set_draw_buffer(context, device->offscreenBuffer);
4346
4347         glPixelZoom(1.0f, 1.0f);
4348     }
4349
4350     glRasterPos3i(local_rect.left, local_rect.top, 1);
4351     checkGLcall("glRasterPos3i");
4352
4353     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4354     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4355
4356     if (surface->flags & SFLAG_PBO)
4357     {
4358         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4359         checkGLcall("glBindBufferARB");
4360     }
4361
4362     glDrawPixels(w, h, fmt, type, mem);
4363     checkGLcall("glDrawPixels");
4364
4365     if (surface->flags & SFLAG_PBO)
4366     {
4367         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4368         checkGLcall("glBindBufferARB");
4369     }
4370
4371     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4372     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4373
4374     LEAVE_GL();
4375
4376     if (wined3d_settings.strict_draw_ordering
4377             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4378             && surface->container.u.swapchain->front_buffer == surface))
4379         wglFlush();
4380
4381     context_release(context);
4382 }
4383
4384 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck,
4385         BOOL use_texturing, struct wined3d_format *format, CONVERT_TYPES *convert)
4386 {
4387     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4388     const struct wined3d_device *device = surface->resource.device;
4389     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4390     BOOL blit_supported = FALSE;
4391
4392     /* Copy the default values from the surface. Below we might perform fixups */
4393     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4394     *format = *surface->resource.format;
4395     *convert = NO_CONVERSION;
4396
4397     /* Ok, now look if we have to do any conversion */
4398     switch (surface->resource.format->id)
4399     {
4400         case WINED3DFMT_P8_UINT:
4401             /* Below the call to blit_supported is disabled for Wine 1.2
4402              * because the function isn't operating correctly yet. At the
4403              * moment 8-bit blits are handled in software and if certain GL
4404              * extensions are around, surface conversion is performed at
4405              * upload time. The blit_supported call recognizes it as a
4406              * destination fixup. This type of upload 'fixup' and 8-bit to
4407              * 8-bit blits need to be handled by the blit_shader.
4408              * TODO: get rid of this #if 0. */
4409 #if 0
4410             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4411                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4412                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4413 #endif
4414             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4415
4416             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4417              * texturing. Further also use conversion in case of color keying.
4418              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4419              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4420              * conflicts with this.
4421              */
4422             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4423                     || colorkey_active || !use_texturing)
4424             {
4425                 format->glFormat = GL_RGBA;
4426                 format->glInternal = GL_RGBA;
4427                 format->glType = GL_UNSIGNED_BYTE;
4428                 format->conv_byte_count = 4;
4429                 if (colorkey_active)
4430                     *convert = CONVERT_PALETTED_CK;
4431                 else
4432                     *convert = CONVERT_PALETTED;
4433             }
4434             break;
4435
4436         case WINED3DFMT_B2G3R3_UNORM:
4437             /* **********************
4438                 GL_UNSIGNED_BYTE_3_3_2
4439                 ********************** */
4440             if (colorkey_active) {
4441                 /* This texture format will never be used.. So do not care about color keying
4442                     up until the point in time it will be needed :-) */
4443                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4444             }
4445             break;
4446
4447         case WINED3DFMT_B5G6R5_UNORM:
4448             if (colorkey_active)
4449             {
4450                 *convert = CONVERT_CK_565;
4451                 format->glFormat = GL_RGBA;
4452                 format->glInternal = GL_RGB5_A1;
4453                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4454                 format->conv_byte_count = 2;
4455             }
4456             break;
4457
4458         case WINED3DFMT_B5G5R5X1_UNORM:
4459             if (colorkey_active)
4460             {
4461                 *convert = CONVERT_CK_5551;
4462                 format->glFormat = GL_BGRA;
4463                 format->glInternal = GL_RGB5_A1;
4464                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4465                 format->conv_byte_count = 2;
4466             }
4467             break;
4468
4469         case WINED3DFMT_B8G8R8_UNORM:
4470             if (colorkey_active)
4471             {
4472                 *convert = CONVERT_CK_RGB24;
4473                 format->glFormat = GL_RGBA;
4474                 format->glInternal = GL_RGBA8;
4475                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4476                 format->conv_byte_count = 4;
4477             }
4478             break;
4479
4480         case WINED3DFMT_B8G8R8X8_UNORM:
4481             if (colorkey_active)
4482             {
4483                 *convert = CONVERT_RGB32_888;
4484                 format->glFormat = GL_RGBA;
4485                 format->glInternal = GL_RGBA8;
4486                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4487                 format->conv_byte_count = 4;
4488             }
4489             break;
4490
4491         default:
4492             break;
4493     }
4494
4495     return WINED3D_OK;
4496 }
4497
4498 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4499 {
4500     const struct wined3d_device *device = surface->resource.device;
4501     const struct wined3d_palette *pal = surface->palette;
4502     BOOL index_in_alpha = FALSE;
4503     unsigned int i;
4504
4505     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4506      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4507      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4508      * duplicate entries. Store the color key in the unused alpha component to speed the
4509      * download up and to make conversion unneeded. */
4510     index_in_alpha = primary_render_target_is_p8(device);
4511
4512     if (!pal)
4513     {
4514         /* In DirectDraw the palette is a property of the surface, there are no such things as device palettes. */
4515         if (device->wined3d->flags & WINED3D_PALETTE_PER_SURFACE)
4516         {
4517             ERR("This code should never get entered for DirectDraw!, expect problems\n");
4518             if (index_in_alpha)
4519             {
4520                 /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4521                  * there's no palette at this time. */
4522                 for (i = 0; i < 256; i++) table[i][3] = i;
4523             }
4524         }
4525         else
4526         {
4527             /* Direct3D >= 8 palette usage style: P8 textures use device palettes, palette entry format is A8R8G8B8,
4528              * alpha is stored in peFlags and may be used by the app if D3DPTEXTURECAPS_ALPHAPALETTE device
4529              * capability flag is present (wine does advertise this capability) */
4530             for (i = 0; i < 256; ++i)
4531             {
4532                 table[i][0] = device->palettes[device->currentPalette][i].peRed;
4533                 table[i][1] = device->palettes[device->currentPalette][i].peGreen;
4534                 table[i][2] = device->palettes[device->currentPalette][i].peBlue;
4535                 table[i][3] = device->palettes[device->currentPalette][i].peFlags;
4536             }
4537         }
4538     }
4539     else
4540     {
4541         TRACE("Using surface palette %p\n", pal);
4542         /* Get the surface's palette */
4543         for (i = 0; i < 256; ++i)
4544         {
4545             table[i][0] = pal->palents[i].peRed;
4546             table[i][1] = pal->palents[i].peGreen;
4547             table[i][2] = pal->palents[i].peBlue;
4548
4549             /* When index_in_alpha is set the palette index is stored in the
4550              * alpha component. In case of a readback we can then read
4551              * GL_ALPHA. Color keying is handled in BltOverride using a
4552              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4553              * color key itself is passed to glAlphaFunc in other cases the
4554              * alpha component of pixels that should be masked away is set to 0. */
4555             if (index_in_alpha)
4556             {
4557                 table[i][3] = i;
4558             }
4559             else if (colorkey && (i >= surface->SrcBltCKey.dwColorSpaceLowValue)
4560                     && (i <= surface->SrcBltCKey.dwColorSpaceHighValue))
4561             {
4562                 table[i][3] = 0x00;
4563             }
4564             else if (pal->flags & WINEDDPCAPS_ALPHA)
4565             {
4566                 table[i][3] = pal->palents[i].peFlags;
4567             }
4568             else
4569             {
4570                 table[i][3] = 0xFF;
4571             }
4572         }
4573     }
4574 }
4575
4576 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width,
4577         UINT height, UINT outpitch, CONVERT_TYPES convert, struct wined3d_surface *surface)
4578 {
4579     const BYTE *source;
4580     BYTE *dest;
4581     TRACE("(%p)->(%p),(%d,%d,%d,%d,%p)\n", src, dst, pitch, height, outpitch, convert, surface);
4582
4583     switch (convert) {
4584         case NO_CONVERSION:
4585         {
4586             memcpy(dst, src, pitch * height);
4587             break;
4588         }
4589         case CONVERT_PALETTED:
4590         case CONVERT_PALETTED_CK:
4591         {
4592             BYTE table[256][4];
4593             unsigned int x, y;
4594
4595             d3dfmt_p8_init_palette(surface, table, (convert == CONVERT_PALETTED_CK));
4596
4597             for (y = 0; y < height; y++)
4598             {
4599                 source = src + pitch * y;
4600                 dest = dst + outpitch * y;
4601                 /* This is an 1 bpp format, using the width here is fine */
4602                 for (x = 0; x < width; x++) {
4603                     BYTE color = *source++;
4604                     *dest++ = table[color][0];
4605                     *dest++ = table[color][1];
4606                     *dest++ = table[color][2];
4607                     *dest++ = table[color][3];
4608                 }
4609             }
4610         }
4611         break;
4612
4613         case CONVERT_CK_565:
4614         {
4615             /* Converting the 565 format in 5551 packed to emulate color-keying.
4616
4617               Note : in all these conversion, it would be best to average the averaging
4618                       pixels to get the color of the pixel that will be color-keyed to
4619                       prevent 'color bleeding'. This will be done later on if ever it is
4620                       too visible.
4621
4622               Note2: Nvidia documents say that their driver does not support alpha + color keying
4623                      on the same surface and disables color keying in such a case
4624             */
4625             unsigned int x, y;
4626             const WORD *Source;
4627             WORD *Dest;
4628
4629             TRACE("Color keyed 565\n");
4630
4631             for (y = 0; y < height; y++) {
4632                 Source = (const WORD *)(src + y * pitch);
4633                 Dest = (WORD *) (dst + y * outpitch);
4634                 for (x = 0; x < width; x++ ) {
4635                     WORD color = *Source++;
4636                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4637                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4638                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4639                         *Dest |= 0x0001;
4640                     Dest++;
4641                 }
4642             }
4643         }
4644         break;
4645
4646         case CONVERT_CK_5551:
4647         {
4648             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4649             unsigned int x, y;
4650             const WORD *Source;
4651             WORD *Dest;
4652             TRACE("Color keyed 5551\n");
4653             for (y = 0; y < height; y++) {
4654                 Source = (const WORD *)(src + y * pitch);
4655                 Dest = (WORD *) (dst + y * outpitch);
4656                 for (x = 0; x < width; x++ ) {
4657                     WORD color = *Source++;
4658                     *Dest = color;
4659                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4660                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4661                         *Dest |= (1 << 15);
4662                     else
4663                         *Dest &= ~(1 << 15);
4664                     Dest++;
4665                 }
4666             }
4667         }
4668         break;
4669
4670         case CONVERT_CK_RGB24:
4671         {
4672             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4673             unsigned int x, y;
4674             for (y = 0; y < height; y++)
4675             {
4676                 source = src + pitch * y;
4677                 dest = dst + outpitch * y;
4678                 for (x = 0; x < width; x++) {
4679                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4680                     DWORD dstcolor = color << 8;
4681                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4682                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4683                         dstcolor |= 0xff;
4684                     *(DWORD*)dest = dstcolor;
4685                     source += 3;
4686                     dest += 4;
4687                 }
4688             }
4689         }
4690         break;
4691
4692         case CONVERT_RGB32_888:
4693         {
4694             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4695             unsigned int x, y;
4696             for (y = 0; y < height; y++)
4697             {
4698                 source = src + pitch * y;
4699                 dest = dst + outpitch * y;
4700                 for (x = 0; x < width; x++) {
4701                     DWORD color = 0xffffff & *(const DWORD*)source;
4702                     DWORD dstcolor = color << 8;
4703                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4704                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4705                         dstcolor |= 0xff;
4706                     *(DWORD*)dest = dstcolor;
4707                     source += 4;
4708                     dest += 4;
4709                 }
4710             }
4711         }
4712         break;
4713
4714         default:
4715             ERR("Unsupported conversion type %#x.\n", convert);
4716     }
4717     return WINED3D_OK;
4718 }
4719
4720 BOOL palette9_changed(struct wined3d_surface *surface)
4721 {
4722     struct wined3d_device *device = surface->resource.device;
4723
4724     if (surface->palette || (surface->resource.format->id != WINED3DFMT_P8_UINT
4725             && surface->resource.format->id != WINED3DFMT_P8_UINT_A8_UNORM))
4726     {
4727         /* If a ddraw-style palette is attached assume no d3d9 palette change.
4728          * Also the palette isn't interesting if the surface format isn't P8 or A8P8
4729          */
4730         return FALSE;
4731     }
4732
4733     if (surface->palette9)
4734     {
4735         if (!memcmp(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256))
4736         {
4737             return FALSE;
4738         }
4739     }
4740     else
4741     {
4742         surface->palette9 = HeapAlloc(GetProcessHeap(), 0, sizeof(PALETTEENTRY) * 256);
4743     }
4744     memcpy(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256);
4745
4746     return TRUE;
4747 }
4748
4749 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4750 {
4751     /* Flip the surface contents */
4752     /* Flip the DC */
4753     {
4754         HDC tmp;
4755         tmp = front->hDC;
4756         front->hDC = back->hDC;
4757         back->hDC = tmp;
4758     }
4759
4760     /* Flip the DIBsection */
4761     {
4762         HBITMAP tmp;
4763         BOOL hasDib = front->flags & SFLAG_DIBSECTION;
4764         tmp = front->dib.DIBsection;
4765         front->dib.DIBsection = back->dib.DIBsection;
4766         back->dib.DIBsection = tmp;
4767
4768         if (back->flags & SFLAG_DIBSECTION) front->flags |= SFLAG_DIBSECTION;
4769         else front->flags &= ~SFLAG_DIBSECTION;
4770         if (hasDib) back->flags |= SFLAG_DIBSECTION;
4771         else back->flags &= ~SFLAG_DIBSECTION;
4772     }
4773
4774     /* Flip the surface data */
4775     {
4776         void* tmp;
4777
4778         tmp = front->dib.bitmap_data;
4779         front->dib.bitmap_data = back->dib.bitmap_data;
4780         back->dib.bitmap_data = tmp;
4781
4782         tmp = front->resource.allocatedMemory;
4783         front->resource.allocatedMemory = back->resource.allocatedMemory;
4784         back->resource.allocatedMemory = tmp;
4785
4786         tmp = front->resource.heapMemory;
4787         front->resource.heapMemory = back->resource.heapMemory;
4788         back->resource.heapMemory = tmp;
4789     }
4790
4791     /* Flip the PBO */
4792     {
4793         GLuint tmp_pbo = front->pbo;
4794         front->pbo = back->pbo;
4795         back->pbo = tmp_pbo;
4796     }
4797
4798     /* client_memory should not be different, but just in case */
4799     {
4800         BOOL tmp;
4801         tmp = front->dib.client_memory;
4802         front->dib.client_memory = back->dib.client_memory;
4803         back->dib.client_memory = tmp;
4804     }
4805
4806     /* Flip the opengl texture */
4807     {
4808         GLuint tmp;
4809
4810         tmp = back->texture_name;
4811         back->texture_name = front->texture_name;
4812         front->texture_name = tmp;
4813
4814         tmp = back->texture_name_srgb;
4815         back->texture_name_srgb = front->texture_name_srgb;
4816         front->texture_name_srgb = tmp;
4817
4818         tmp = back->rb_multisample;
4819         back->rb_multisample = front->rb_multisample;
4820         front->rb_multisample = tmp;
4821
4822         tmp = back->rb_resolved;
4823         back->rb_resolved = front->rb_resolved;
4824         front->rb_resolved = tmp;
4825
4826         resource_unload(&back->resource);
4827         resource_unload(&front->resource);
4828     }
4829
4830     {
4831         DWORD tmp_flags = back->flags;
4832         back->flags = front->flags;
4833         front->flags = tmp_flags;
4834     }
4835 }
4836
4837 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4838  * pixel copy calls. */
4839 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4840         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4841 {
4842     struct wined3d_device *device = dst_surface->resource.device;
4843     float xrel, yrel;
4844     UINT row;
4845     struct wined3d_context *context;
4846     BOOL upsidedown = FALSE;
4847     RECT dst_rect = *dst_rect_in;
4848
4849     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4850      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4851      */
4852     if(dst_rect.top > dst_rect.bottom) {
4853         UINT tmp = dst_rect.bottom;
4854         dst_rect.bottom = dst_rect.top;
4855         dst_rect.top = tmp;
4856         upsidedown = TRUE;
4857     }
4858
4859     context = context_acquire(device, src_surface);
4860     context_apply_blit_state(context, device);
4861     surface_internal_preload(dst_surface, SRGB_RGB);
4862     ENTER_GL();
4863
4864     /* Bind the target texture */
4865     context_bind_texture(context, dst_surface->texture_target, dst_surface->texture_name);
4866     if (surface_is_offscreen(src_surface))
4867     {
4868         TRACE("Reading from an offscreen target\n");
4869         upsidedown = !upsidedown;
4870         glReadBuffer(device->offscreenBuffer);
4871     }
4872     else
4873     {
4874         glReadBuffer(surface_get_gl_buffer(src_surface));
4875     }
4876     checkGLcall("glReadBuffer");
4877
4878     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4879     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4880
4881     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4882     {
4883         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4884
4885         if(Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT) {
4886             ERR("Texture filtering not supported in direct blit\n");
4887         }
4888     }
4889     else if ((Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT)
4890             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4891     {
4892         ERR("Texture filtering not supported in direct blit\n");
4893     }
4894
4895     if (upsidedown
4896             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4897             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4898     {
4899         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4900
4901         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4902                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4903                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4904                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4905     }
4906     else
4907     {
4908         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4909         /* I have to process this row by row to swap the image,
4910          * otherwise it would be upside down, so stretching in y direction
4911          * doesn't cost extra time
4912          *
4913          * However, stretching in x direction can be avoided if not necessary
4914          */
4915         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4916             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4917             {
4918                 /* Well, that stuff works, but it's very slow.
4919                  * find a better way instead
4920                  */
4921                 UINT col;
4922
4923                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4924                 {
4925                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4926                             dst_rect.left + col /* x offset */, row /* y offset */,
4927                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4928                 }
4929             }
4930             else
4931             {
4932                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4933                         dst_rect.left /* x offset */, row /* y offset */,
4934                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4935             }
4936         }
4937     }
4938     checkGLcall("glCopyTexSubImage2D");
4939
4940     LEAVE_GL();
4941     context_release(context);
4942
4943     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4944      * path is never entered
4945      */
4946     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4947 }
4948
4949 /* Uses the hardware to stretch and flip the image */
4950 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4951         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4952 {
4953     struct wined3d_device *device = dst_surface->resource.device;
4954     struct wined3d_swapchain *src_swapchain = NULL;
4955     GLuint src, backup = 0;
4956     float left, right, top, bottom; /* Texture coordinates */
4957     UINT fbwidth = src_surface->resource.width;
4958     UINT fbheight = src_surface->resource.height;
4959     struct wined3d_context *context;
4960     GLenum drawBuffer = GL_BACK;
4961     GLenum texture_target;
4962     BOOL noBackBufferBackup;
4963     BOOL src_offscreen;
4964     BOOL upsidedown = FALSE;
4965     RECT dst_rect = *dst_rect_in;
4966
4967     TRACE("Using hwstretch blit\n");
4968     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4969     context = context_acquire(device, src_surface);
4970     context_apply_blit_state(context, device);
4971     surface_internal_preload(dst_surface, SRGB_RGB);
4972
4973     src_offscreen = surface_is_offscreen(src_surface);
4974     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4975     if (!noBackBufferBackup && !src_surface->texture_name)
4976     {
4977         /* Get it a description */
4978         surface_internal_preload(src_surface, SRGB_RGB);
4979     }
4980     ENTER_GL();
4981
4982     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4983      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4984      */
4985     if (context->aux_buffers >= 2)
4986     {
4987         /* Got more than one aux buffer? Use the 2nd aux buffer */
4988         drawBuffer = GL_AUX1;
4989     }
4990     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4991     {
4992         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
4993         drawBuffer = GL_AUX0;
4994     }
4995
4996     if(noBackBufferBackup) {
4997         glGenTextures(1, &backup);
4998         checkGLcall("glGenTextures");
4999         context_bind_texture(context, GL_TEXTURE_2D, backup);
5000         texture_target = GL_TEXTURE_2D;
5001     } else {
5002         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
5003          * we are reading from the back buffer, the backup can be used as source texture
5004          */
5005         texture_target = src_surface->texture_target;
5006         context_bind_texture(context, texture_target, src_surface->texture_name);
5007         glEnable(texture_target);
5008         checkGLcall("glEnable(texture_target)");
5009
5010         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
5011         src_surface->flags &= ~SFLAG_INTEXTURE;
5012     }
5013
5014     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
5015      * glCopyTexSubImage is a bit picky about the parameters we pass to it
5016      */
5017     if(dst_rect.top > dst_rect.bottom) {
5018         UINT tmp = dst_rect.bottom;
5019         dst_rect.bottom = dst_rect.top;
5020         dst_rect.top = tmp;
5021         upsidedown = TRUE;
5022     }
5023
5024     if (src_offscreen)
5025     {
5026         TRACE("Reading from an offscreen target\n");
5027         upsidedown = !upsidedown;
5028         glReadBuffer(device->offscreenBuffer);
5029     }
5030     else
5031     {
5032         glReadBuffer(surface_get_gl_buffer(src_surface));
5033     }
5034
5035     /* TODO: Only back up the part that will be overwritten */
5036     glCopyTexSubImage2D(texture_target, 0,
5037                         0, 0 /* read offsets */,
5038                         0, 0,
5039                         fbwidth,
5040                         fbheight);
5041
5042     checkGLcall("glCopyTexSubImage2D");
5043
5044     /* No issue with overriding these - the sampler is dirty due to blit usage */
5045     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
5046             wined3d_gl_mag_filter(magLookup, Filter));
5047     checkGLcall("glTexParameteri");
5048     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
5049             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
5050     checkGLcall("glTexParameteri");
5051
5052     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5053         src_swapchain = src_surface->container.u.swapchain;
5054     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
5055     {
5056         src = backup ? backup : src_surface->texture_name;
5057     }
5058     else
5059     {
5060         glReadBuffer(GL_FRONT);
5061         checkGLcall("glReadBuffer(GL_FRONT)");
5062
5063         glGenTextures(1, &src);
5064         checkGLcall("glGenTextures(1, &src)");
5065         context_bind_texture(context, GL_TEXTURE_2D, src);
5066
5067         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5068          * out for power of 2 sizes
5069          */
5070         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5071                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5072         checkGLcall("glTexImage2D");
5073         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
5074                             0, 0 /* read offsets */,
5075                             0, 0,
5076                             fbwidth,
5077                             fbheight);
5078
5079         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5080         checkGLcall("glTexParameteri");
5081         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5082         checkGLcall("glTexParameteri");
5083
5084         glReadBuffer(GL_BACK);
5085         checkGLcall("glReadBuffer(GL_BACK)");
5086
5087         if(texture_target != GL_TEXTURE_2D) {
5088             glDisable(texture_target);
5089             glEnable(GL_TEXTURE_2D);
5090             texture_target = GL_TEXTURE_2D;
5091         }
5092     }
5093     checkGLcall("glEnd and previous");
5094
5095     left = src_rect->left;
5096     right = src_rect->right;
5097
5098     if (!upsidedown)
5099     {
5100         top = src_surface->resource.height - src_rect->top;
5101         bottom = src_surface->resource.height - src_rect->bottom;
5102     }
5103     else
5104     {
5105         top = src_surface->resource.height - src_rect->bottom;
5106         bottom = src_surface->resource.height - src_rect->top;
5107     }
5108
5109     if (src_surface->flags & SFLAG_NORMCOORD)
5110     {
5111         left /= src_surface->pow2Width;
5112         right /= src_surface->pow2Width;
5113         top /= src_surface->pow2Height;
5114         bottom /= src_surface->pow2Height;
5115     }
5116
5117     /* draw the source texture stretched and upside down. The correct surface is bound already */
5118     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5119     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5120
5121     context_set_draw_buffer(context, drawBuffer);
5122     glReadBuffer(drawBuffer);
5123
5124     glBegin(GL_QUADS);
5125         /* bottom left */
5126         glTexCoord2f(left, bottom);
5127         glVertex2i(0, 0);
5128
5129         /* top left */
5130         glTexCoord2f(left, top);
5131         glVertex2i(0, dst_rect.bottom - dst_rect.top);
5132
5133         /* top right */
5134         glTexCoord2f(right, top);
5135         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5136
5137         /* bottom right */
5138         glTexCoord2f(right, bottom);
5139         glVertex2i(dst_rect.right - dst_rect.left, 0);
5140     glEnd();
5141     checkGLcall("glEnd and previous");
5142
5143     if (texture_target != dst_surface->texture_target)
5144     {
5145         glDisable(texture_target);
5146         glEnable(dst_surface->texture_target);
5147         texture_target = dst_surface->texture_target;
5148     }
5149
5150     /* Now read the stretched and upside down image into the destination texture */
5151     context_bind_texture(context, texture_target, dst_surface->texture_name);
5152     glCopyTexSubImage2D(texture_target,
5153                         0,
5154                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5155                         0, 0, /* We blitted the image to the origin */
5156                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5157     checkGLcall("glCopyTexSubImage2D");
5158
5159     if(drawBuffer == GL_BACK) {
5160         /* Write the back buffer backup back */
5161         if(backup) {
5162             if(texture_target != GL_TEXTURE_2D) {
5163                 glDisable(texture_target);
5164                 glEnable(GL_TEXTURE_2D);
5165                 texture_target = GL_TEXTURE_2D;
5166             }
5167             context_bind_texture(context, GL_TEXTURE_2D, backup);
5168         }
5169         else
5170         {
5171             if (texture_target != src_surface->texture_target)
5172             {
5173                 glDisable(texture_target);
5174                 glEnable(src_surface->texture_target);
5175                 texture_target = src_surface->texture_target;
5176             }
5177             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5178         }
5179
5180         glBegin(GL_QUADS);
5181             /* top left */
5182             glTexCoord2f(0.0f, 0.0f);
5183             glVertex2i(0, fbheight);
5184
5185             /* bottom left */
5186             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5187             glVertex2i(0, 0);
5188
5189             /* bottom right */
5190             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5191                     (float)fbheight / (float)src_surface->pow2Height);
5192             glVertex2i(fbwidth, 0);
5193
5194             /* top right */
5195             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5196             glVertex2i(fbwidth, fbheight);
5197         glEnd();
5198     }
5199     glDisable(texture_target);
5200     checkGLcall("glDisable(texture_target)");
5201
5202     /* Cleanup */
5203     if (src != src_surface->texture_name && src != backup)
5204     {
5205         glDeleteTextures(1, &src);
5206         checkGLcall("glDeleteTextures(1, &src)");
5207     }
5208     if(backup) {
5209         glDeleteTextures(1, &backup);
5210         checkGLcall("glDeleteTextures(1, &backup)");
5211     }
5212
5213     LEAVE_GL();
5214
5215     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5216
5217     context_release(context);
5218
5219     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5220      * path is never entered
5221      */
5222     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5223 }
5224
5225 /* Front buffer coordinates are always full screen coordinates, but our GL
5226  * drawable is limited to the window's client area. The sysmem and texture
5227  * copies do have the full screen size. Note that GL has a bottom-left
5228  * origin, while D3D has a top-left origin. */
5229 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5230 {
5231     UINT drawable_height;
5232
5233     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5234             && surface == surface->container.u.swapchain->front_buffer)
5235     {
5236         POINT offset = {0, 0};
5237         RECT windowsize;
5238
5239         ScreenToClient(window, &offset);
5240         OffsetRect(rect, offset.x, offset.y);
5241
5242         GetClientRect(window, &windowsize);
5243         drawable_height = windowsize.bottom - windowsize.top;
5244     }
5245     else
5246     {
5247         drawable_height = surface->resource.height;
5248     }
5249
5250     rect->top = drawable_height - rect->top;
5251     rect->bottom = drawable_height - rect->bottom;
5252 }
5253
5254 static void surface_blt_to_drawable(struct wined3d_device *device,
5255         WINED3DTEXTUREFILTERTYPE filter, BOOL color_key,
5256         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5257         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5258 {
5259     struct wined3d_context *context;
5260     RECT src_rect, dst_rect;
5261
5262     src_rect = *src_rect_in;
5263     dst_rect = *dst_rect_in;
5264
5265     /* Make sure the surface is up-to-date. This should probably use
5266      * surface_load_location() and worry about the destination surface too,
5267      * unless we're overwriting it completely. */
5268     surface_internal_preload(src_surface, SRGB_RGB);
5269
5270     /* Activate the destination context, set it up for blitting */
5271     context = context_acquire(device, dst_surface);
5272     context_apply_blit_state(context, device);
5273
5274     if (!surface_is_offscreen(dst_surface))
5275         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5276
5277     device->blitter->set_shader(device->blit_priv, context, src_surface);
5278
5279     ENTER_GL();
5280
5281     if (color_key)
5282     {
5283         glEnable(GL_ALPHA_TEST);
5284         checkGLcall("glEnable(GL_ALPHA_TEST)");
5285
5286         /* When the primary render target uses P8, the alpha component
5287          * contains the palette index. Which means that the colorkey is one of
5288          * the palette entries. In other cases pixels that should be masked
5289          * away have alpha set to 0. */
5290         if (primary_render_target_is_p8(device))
5291             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->SrcBltCKey.dwColorSpaceLowValue / 256.0f);
5292         else
5293             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5294         checkGLcall("glAlphaFunc");
5295     }
5296     else
5297     {
5298         glDisable(GL_ALPHA_TEST);
5299         checkGLcall("glDisable(GL_ALPHA_TEST)");
5300     }
5301
5302     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5303
5304     if (color_key)
5305     {
5306         glDisable(GL_ALPHA_TEST);
5307         checkGLcall("glDisable(GL_ALPHA_TEST)");
5308     }
5309
5310     LEAVE_GL();
5311
5312     /* Leave the opengl state valid for blitting */
5313     device->blitter->unset_shader(context->gl_info);
5314
5315     if (wined3d_settings.strict_draw_ordering
5316             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5317             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5318         wglFlush(); /* Flush to ensure ordering across contexts. */
5319
5320     context_release(context);
5321 }
5322
5323 /* Do not call while under the GL lock. */
5324 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const WINED3DCOLORVALUE *color)
5325 {
5326     struct wined3d_device *device = s->resource.device;
5327     const struct blit_shader *blitter;
5328
5329     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5330             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5331     if (!blitter)
5332     {
5333         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5334         return WINED3DERR_INVALIDCALL;
5335     }
5336
5337     return blitter->color_fill(device, s, rect, color);
5338 }
5339
5340 /* Do not call while under the GL lock. */
5341 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5342         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5343         WINED3DTEXTUREFILTERTYPE Filter)
5344 {
5345     struct wined3d_device *device = dst_surface->resource.device;
5346     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5347     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5348
5349     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5350             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5351             flags, DDBltFx, debug_d3dtexturefiltertype(Filter));
5352
5353     /* Get the swapchain. One of the surfaces has to be a primary surface */
5354     if (dst_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5355     {
5356         WARN("Destination is in sysmem, rejecting gl blt\n");
5357         return WINED3DERR_INVALIDCALL;
5358     }
5359
5360     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5361         dstSwapchain = dst_surface->container.u.swapchain;
5362
5363     if (src_surface)
5364     {
5365         if (src_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5366         {
5367             WARN("Src is in sysmem, rejecting gl blt\n");
5368             return WINED3DERR_INVALIDCALL;
5369         }
5370
5371         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5372             srcSwapchain = src_surface->container.u.swapchain;
5373     }
5374
5375     /* Early sort out of cases where no render target is used */
5376     if (!dstSwapchain && !srcSwapchain
5377             && src_surface != device->fb.render_targets[0]
5378             && dst_surface != device->fb.render_targets[0])
5379     {
5380         TRACE("No surface is render target, not using hardware blit.\n");
5381         return WINED3DERR_INVALIDCALL;
5382     }
5383
5384     /* No destination color keying supported */
5385     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5386     {
5387         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5388         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5389         return WINED3DERR_INVALIDCALL;
5390     }
5391
5392     /* The only case where both surfaces on a swapchain are supported is a back buffer -> front buffer blit on the same swapchain */
5393     if (dstSwapchain && dstSwapchain == srcSwapchain && dstSwapchain->back_buffers
5394             && dst_surface == dstSwapchain->front_buffer
5395             && src_surface == dstSwapchain->back_buffers[0])
5396     {
5397         /* Half-Life does a Blt from the back buffer to the front buffer,
5398          * Full surface size, no flags... Use present instead
5399          *
5400          * This path will only be entered for d3d7 and ddraw apps, because d3d8/9 offer no way to blit TO the front buffer
5401          */
5402
5403         /* Check rects - wined3d_swapchain_present() doesn't handle them. */
5404         for (;;)
5405         {
5406             TRACE("Looking if a Present can be done...\n");
5407             /* Source Rectangle must be full surface */
5408             if (src_rect->left || src_rect->top
5409                     || src_rect->right != src_surface->resource.width
5410                     || src_rect->bottom != src_surface->resource.height)
5411             {
5412                 TRACE("No, Source rectangle doesn't match\n");
5413                 break;
5414             }
5415
5416             /* No stretching may occur */
5417             if (src_rect->right != dst_rect->right - dst_rect->left
5418                     || src_rect->bottom != dst_rect->bottom - dst_rect->top)
5419             {
5420                 TRACE("No, stretching is done\n");
5421                 break;
5422             }
5423
5424             /* Destination must be full surface or match the clipping rectangle */
5425             if (dst_surface->clipper && dst_surface->clipper->hWnd)
5426             {
5427                 RECT cliprect;
5428                 POINT pos[2];
5429                 GetClientRect(dst_surface->clipper->hWnd, &cliprect);
5430                 pos[0].x = dst_rect->left;
5431                 pos[0].y = dst_rect->top;
5432                 pos[1].x = dst_rect->right;
5433                 pos[1].y = dst_rect->bottom;
5434                 MapWindowPoints(GetDesktopWindow(), dst_surface->clipper->hWnd, pos, 2);
5435
5436                 if (pos[0].x != cliprect.left || pos[0].y != cliprect.top
5437                         || pos[1].x != cliprect.right || pos[1].y != cliprect.bottom)
5438                 {
5439                     TRACE("No, dest rectangle doesn't match(clipper)\n");
5440                     TRACE("Clip rect at %s\n", wine_dbgstr_rect(&cliprect));
5441                     TRACE("Blt dest: %s\n", wine_dbgstr_rect(dst_rect));
5442                     break;
5443                 }
5444             }
5445             else if (dst_rect->left || dst_rect->top
5446                     || dst_rect->right != dst_surface->resource.width
5447                     || dst_rect->bottom != dst_surface->resource.height)
5448             {
5449                 TRACE("No, dest rectangle doesn't match(surface size)\n");
5450                 break;
5451             }
5452
5453             TRACE("Yes\n");
5454
5455             /* These flags are unimportant for the flag check, remove them */
5456             if (!(flags & ~(WINEDDBLT_DONOTWAIT | WINEDDBLT_WAIT)))
5457             {
5458                 WINED3DSWAPEFFECT orig_swap = dstSwapchain->presentParms.SwapEffect;
5459
5460                 /* The idea behind this is that a glReadPixels and a glDrawPixels call
5461                     * take very long, while a flip is fast.
5462                     * This applies to Half-Life, which does such Blts every time it finished
5463                     * a frame, and to Prince of Persia 3D, which uses this to draw at least the main
5464                     * menu. This is also used by all apps when they do windowed rendering
5465                     *
5466                     * The problem is that flipping is not really the same as copying. After a
5467                     * Blt the front buffer is a copy of the back buffer, and the back buffer is
5468                     * untouched. Therefore it's necessary to override the swap effect
5469                     * and to set it back after the flip.
5470                     *
5471                     * Windowed Direct3D < 7 apps do the same. The D3D7 sdk demos are nice
5472                     * testcases.
5473                     */
5474
5475                 dstSwapchain->presentParms.SwapEffect = WINED3DSWAPEFFECT_COPY;
5476                 dstSwapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_IMMEDIATE;
5477
5478                 TRACE("Full screen back buffer -> front buffer blt, performing a flip instead.\n");
5479                 wined3d_swapchain_present(dstSwapchain, NULL, NULL, dstSwapchain->win_handle, NULL, 0);
5480
5481                 dstSwapchain->presentParms.SwapEffect = orig_swap;
5482
5483                 return WINED3D_OK;
5484             }
5485             break;
5486         }
5487
5488         TRACE("Unsupported blit between buffers on the same swapchain\n");
5489         return WINED3DERR_INVALIDCALL;
5490     } else if(dstSwapchain && dstSwapchain == srcSwapchain) {
5491         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5492         return WINED3DERR_INVALIDCALL;
5493     } else if(dstSwapchain && srcSwapchain) {
5494         FIXME("Implement hardware blit between two different swapchains\n");
5495         return WINED3DERR_INVALIDCALL;
5496     }
5497     else if (dstSwapchain)
5498     {
5499         /* Handled with regular texture -> swapchain blit */
5500         if (src_surface == device->fb.render_targets[0])
5501             TRACE("Blit from active render target to a swapchain\n");
5502     }
5503     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5504     {
5505         FIXME("Implement blit from a swapchain to the active render target\n");
5506         return WINED3DERR_INVALIDCALL;
5507     }
5508
5509     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5510     {
5511         /* Blit from render target to texture */
5512         BOOL stretchx;
5513
5514         /* P8 read back is not implemented */
5515         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5516                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5517         {
5518             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5519             return WINED3DERR_INVALIDCALL;
5520         }
5521
5522         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5523         {
5524             TRACE("Color keying not supported by frame buffer to texture blit\n");
5525             return WINED3DERR_INVALIDCALL;
5526             /* Destination color key is checked above */
5527         }
5528
5529         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5530             stretchx = TRUE;
5531         else
5532             stretchx = FALSE;
5533
5534         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5535          * flip the image nor scale it.
5536          *
5537          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5538          * -> If the app wants a image width an unscaled width, copy it line per line
5539          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5540          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5541          *    back buffer. This is slower than reading line per line, thus not used for flipping
5542          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5543          *    pixel by pixel. */
5544         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5545                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5546         {
5547             TRACE("No stretching in x direction, using direct framebuffer -> texture copy\n");
5548             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, Filter);
5549         } else {
5550             TRACE("Using hardware stretching to flip / stretch the texture\n");
5551             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, Filter);
5552         }
5553
5554         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5555         {
5556             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5557             dst_surface->resource.allocatedMemory = NULL;
5558             dst_surface->resource.heapMemory = NULL;
5559         }
5560         else
5561         {
5562             dst_surface->flags &= ~SFLAG_INSYSMEM;
5563         }
5564
5565         return WINED3D_OK;
5566     }
5567     else if (src_surface)
5568     {
5569         /* Blit from offscreen surface to render target */
5570         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5571         WINEDDCOLORKEY oldBltCKey = src_surface->SrcBltCKey;
5572
5573         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5574
5575         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5576                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5577                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5578         {
5579             FIXME("Unsupported blit operation falling back to software\n");
5580             return WINED3DERR_INVALIDCALL;
5581         }
5582
5583         /* Color keying: Check if we have to do a color keyed blt,
5584          * and if not check if a color key is activated.
5585          *
5586          * Just modify the color keying parameters in the surface and restore them afterwards
5587          * The surface keeps track of the color key last used to load the opengl surface.
5588          * PreLoad will catch the change to the flags and color key and reload if necessary.
5589          */
5590         if (flags & WINEDDBLT_KEYSRC)
5591         {
5592             /* Use color key from surface */
5593         }
5594         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5595         {
5596             /* Use color key from DDBltFx */
5597             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5598             src_surface->SrcBltCKey = DDBltFx->ddckSrcColorkey;
5599         }
5600         else
5601         {
5602             /* Do not use color key */
5603             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5604         }
5605
5606         surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5607                 src_surface, src_rect, dst_surface, dst_rect);
5608
5609         /* Restore the color key parameters */
5610         src_surface->CKeyFlags = oldCKeyFlags;
5611         src_surface->SrcBltCKey = oldBltCKey;
5612
5613         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5614
5615         return WINED3D_OK;
5616     }
5617
5618     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5619     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5620     return WINED3DERR_INVALIDCALL;
5621 }
5622
5623 /* GL locking is done by the caller */
5624 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5625         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5626 {
5627     struct wined3d_device *device = surface->resource.device;
5628     const struct wined3d_gl_info *gl_info = context->gl_info;
5629     GLint compare_mode = GL_NONE;
5630     struct blt_info info;
5631     GLint old_binding = 0;
5632     RECT rect;
5633
5634     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5635
5636     glDisable(GL_CULL_FACE);
5637     glDisable(GL_BLEND);
5638     glDisable(GL_ALPHA_TEST);
5639     glDisable(GL_SCISSOR_TEST);
5640     glDisable(GL_STENCIL_TEST);
5641     glEnable(GL_DEPTH_TEST);
5642     glDepthFunc(GL_ALWAYS);
5643     glDepthMask(GL_TRUE);
5644     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5645     glViewport(x, y, w, h);
5646
5647     SetRect(&rect, 0, h, w, 0);
5648     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5649     context_active_texture(context, context->gl_info, 0);
5650     glGetIntegerv(info.binding, &old_binding);
5651     glBindTexture(info.bind_target, texture);
5652     if (gl_info->supported[ARB_SHADOW])
5653     {
5654         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5655         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5656     }
5657
5658     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5659             gl_info, info.tex_type, &surface->ds_current_size);
5660
5661     glBegin(GL_TRIANGLE_STRIP);
5662     glTexCoord3fv(info.coords[0]);
5663     glVertex2f(-1.0f, -1.0f);
5664     glTexCoord3fv(info.coords[1]);
5665     glVertex2f(1.0f, -1.0f);
5666     glTexCoord3fv(info.coords[2]);
5667     glVertex2f(-1.0f, 1.0f);
5668     glTexCoord3fv(info.coords[3]);
5669     glVertex2f(1.0f, 1.0f);
5670     glEnd();
5671
5672     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5673     glBindTexture(info.bind_target, old_binding);
5674
5675     glPopAttrib();
5676
5677     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5678 }
5679
5680 void surface_modify_ds_location(struct wined3d_surface *surface,
5681         DWORD location, UINT w, UINT h)
5682 {
5683     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5684
5685     if (location & ~SFLAG_DS_LOCATIONS)
5686         FIXME("Invalid location (%#x) specified.\n", location);
5687
5688     surface->ds_current_size.cx = w;
5689     surface->ds_current_size.cy = h;
5690     surface->flags &= ~SFLAG_DS_LOCATIONS;
5691     surface->flags |= location;
5692 }
5693
5694 /* Context activation is done by the caller. */
5695 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5696 {
5697     struct wined3d_device *device = surface->resource.device;
5698     GLsizei w, h;
5699
5700     TRACE("surface %p, new location %#x.\n", surface, location);
5701
5702     /* TODO: Make this work for modes other than FBO */
5703     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5704
5705     if (!(surface->flags & location))
5706     {
5707         w = surface->ds_current_size.cx;
5708         h = surface->ds_current_size.cy;
5709         surface->ds_current_size.cx = 0;
5710         surface->ds_current_size.cy = 0;
5711     }
5712     else
5713     {
5714         w = surface->resource.width;
5715         h = surface->resource.height;
5716     }
5717
5718     if (surface->ds_current_size.cx == surface->resource.width
5719             && surface->ds_current_size.cy == surface->resource.height)
5720     {
5721         TRACE("Location (%#x) is already up to date.\n", location);
5722         return;
5723     }
5724
5725     if (surface->current_renderbuffer)
5726     {
5727         FIXME("Not supported with fixed up depth stencil.\n");
5728         return;
5729     }
5730
5731     if (!(surface->flags & SFLAG_DS_LOCATIONS))
5732     {
5733         /* This mostly happens when a depth / stencil is used without being
5734          * cleared first. In principle we could upload from sysmem, or
5735          * explicitly clear before first usage. For the moment there don't
5736          * appear to be a lot of applications depending on this, so a FIXME
5737          * should do. */
5738         FIXME("No up to date depth stencil location.\n");
5739         surface->flags |= location;
5740         surface->ds_current_size.cx = surface->resource.width;
5741         surface->ds_current_size.cy = surface->resource.height;
5742         return;
5743     }
5744
5745     if (location == SFLAG_DS_OFFSCREEN)
5746     {
5747         GLint old_binding = 0;
5748         GLenum bind_target;
5749
5750         /* The render target is allowed to be smaller than the depth/stencil
5751          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5752          * than the offscreen surface. Don't overwrite the offscreen surface
5753          * with undefined data. */
5754         w = min(w, context->swapchain->presentParms.BackBufferWidth);
5755         h = min(h, context->swapchain->presentParms.BackBufferHeight);
5756
5757         TRACE("Copying onscreen depth buffer to depth texture.\n");
5758
5759         ENTER_GL();
5760
5761         if (!device->depth_blt_texture)
5762         {
5763             glGenTextures(1, &device->depth_blt_texture);
5764         }
5765
5766         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5767          * directly on the FBO texture. That's because we need to flip. */
5768         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5769                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5770         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5771         {
5772             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5773             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5774         }
5775         else
5776         {
5777             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5778             bind_target = GL_TEXTURE_2D;
5779         }
5780         glBindTexture(bind_target, device->depth_blt_texture);
5781         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5782          * internal format, because the internal format might include stencil
5783          * data. In principle we should copy stencil data as well, but unless
5784          * the driver supports stencil export it's hard to do, and doesn't
5785          * seem to be needed in practice. If the hardware doesn't support
5786          * writing stencil data, the glCopyTexImage2D() call might trigger
5787          * software fallbacks. */
5788         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5789         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5790         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5791         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5792         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5793         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5794         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5795         glBindTexture(bind_target, old_binding);
5796
5797         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5798                 NULL, surface, SFLAG_INTEXTURE);
5799         context_set_draw_buffer(context, GL_NONE);
5800         glReadBuffer(GL_NONE);
5801
5802         /* Do the actual blit */
5803         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5804         checkGLcall("depth_blt");
5805
5806         context_invalidate_state(context, STATE_FRAMEBUFFER);
5807
5808         LEAVE_GL();
5809
5810         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5811     }
5812     else if (location == SFLAG_DS_ONSCREEN)
5813     {
5814         TRACE("Copying depth texture to onscreen depth buffer.\n");
5815
5816         ENTER_GL();
5817
5818         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5819                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5820         surface_depth_blt(surface, context, surface->texture_name,
5821                 0, surface->pow2Height - h, w, h, surface->texture_target);
5822         checkGLcall("depth_blt");
5823
5824         context_invalidate_state(context, STATE_FRAMEBUFFER);
5825
5826         LEAVE_GL();
5827
5828         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5829     }
5830     else
5831     {
5832         ERR("Invalid location (%#x) specified.\n", location);
5833     }
5834
5835     surface->flags |= location;
5836     surface->ds_current_size.cx = surface->resource.width;
5837     surface->ds_current_size.cy = surface->resource.height;
5838 }
5839
5840 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5841 {
5842     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5843     struct wined3d_surface *overlay;
5844
5845     TRACE("surface %p, location %s, persistent %#x.\n",
5846             surface, debug_surflocation(location), persistent);
5847
5848     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5849             && (location & SFLAG_INDRAWABLE))
5850         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5851
5852     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5853             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5854         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5855
5856     if (persistent)
5857     {
5858         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5859                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5860         {
5861             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5862             {
5863                 TRACE("Passing to container.\n");
5864                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5865             }
5866         }
5867         surface->flags &= ~SFLAG_LOCATIONS;
5868         surface->flags |= location;
5869
5870         /* Redraw emulated overlays, if any */
5871         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5872         {
5873             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5874             {
5875                 overlay->surface_ops->surface_draw_overlay(overlay);
5876             }
5877         }
5878     }
5879     else
5880     {
5881         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5882         {
5883             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5884             {
5885                 TRACE("Passing to container\n");
5886                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5887             }
5888         }
5889         surface->flags &= ~location;
5890     }
5891
5892     if (!(surface->flags & SFLAG_LOCATIONS))
5893     {
5894         ERR("Surface %p does not have any up to date location.\n", surface);
5895     }
5896 }
5897
5898 static DWORD resource_access_from_location(DWORD location)
5899 {
5900     switch (location)
5901     {
5902         case SFLAG_INSYSMEM:
5903             return WINED3D_RESOURCE_ACCESS_CPU;
5904
5905         case SFLAG_INDRAWABLE:
5906         case SFLAG_INSRGBTEX:
5907         case SFLAG_INTEXTURE:
5908         case SFLAG_INRB_MULTISAMPLE:
5909         case SFLAG_INRB_RESOLVED:
5910             return WINED3D_RESOURCE_ACCESS_GPU;
5911
5912         default:
5913             FIXME("Unhandled location %#x.\n", location);
5914             return 0;
5915     }
5916 }
5917
5918 static void surface_load_sysmem(struct wined3d_surface *surface,
5919         const struct wined3d_gl_info *gl_info, const RECT *rect)
5920 {
5921     surface_prepare_system_memory(surface);
5922
5923     /* Download the surface to system memory. */
5924     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5925     {
5926         struct wined3d_device *device = surface->resource.device;
5927         struct wined3d_context *context;
5928
5929         /* TODO: Use already acquired context when possible. */
5930         context = context_acquire(device, NULL);
5931
5932         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5933         surface_download_data(surface, gl_info);
5934
5935         context_release(context);
5936
5937         return;
5938     }
5939
5940     /* Note: It might be faster to download into a texture first. */
5941     read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5942             wined3d_surface_get_pitch(surface));
5943 }
5944
5945 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5946         const struct wined3d_gl_info *gl_info, const RECT *rect)
5947 {
5948     struct wined3d_device *device = surface->resource.device;
5949     struct wined3d_format format;
5950     CONVERT_TYPES convert;
5951     UINT byte_count;
5952     BYTE *mem;
5953
5954     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5955     {
5956         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5957         return WINED3DERR_INVALIDCALL;
5958     }
5959
5960     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5961         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5962
5963     if (surface->flags & SFLAG_INTEXTURE)
5964     {
5965         RECT r;
5966
5967         surface_get_rect(surface, rect, &r);
5968         surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
5969
5970         return WINED3D_OK;
5971     }
5972
5973     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5974     {
5975         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5976          * path through sysmem. */
5977         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5978     }
5979
5980     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5981
5982     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5983      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5984      * called. */
5985     if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
5986     {
5987         struct wined3d_context *context;
5988
5989         TRACE("Removing the pbo attached to surface %p.\n", surface);
5990
5991         /* TODO: Use already acquired context when possible. */
5992         context = context_acquire(device, NULL);
5993
5994         surface_remove_pbo(surface, gl_info);
5995
5996         context_release(context);
5997     }
5998
5999     if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
6000     {
6001         UINT height = surface->resource.height;
6002         UINT width = surface->resource.width;
6003         UINT src_pitch, dst_pitch;
6004
6005         byte_count = format.conv_byte_count;
6006         src_pitch = wined3d_surface_get_pitch(surface);
6007
6008         /* Stick to the alignment for the converted surface too, makes it
6009          * easier to load the surface. */
6010         dst_pitch = width * byte_count;
6011         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6012
6013         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6014         {
6015             ERR("Out of memory (%u).\n", dst_pitch * height);
6016             return E_OUTOFMEMORY;
6017         }
6018
6019         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
6020                 src_pitch, width, height, dst_pitch, convert, surface);
6021
6022         surface->flags |= SFLAG_CONVERTED;
6023     }
6024     else
6025     {
6026         surface->flags &= ~SFLAG_CONVERTED;
6027         mem = surface->resource.allocatedMemory;
6028         byte_count = format.byte_count;
6029     }
6030
6031     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
6032
6033     /* Don't delete PBO memory. */
6034     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6035         HeapFree(GetProcessHeap(), 0, mem);
6036
6037     return WINED3D_OK;
6038 }
6039
6040 static HRESULT surface_load_texture(struct wined3d_surface *surface,
6041         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
6042 {
6043     const DWORD attach_flags = WINED3DFMT_FLAG_FBO_ATTACHABLE | WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB;
6044     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
6045     struct wined3d_device *device = surface->resource.device;
6046     struct wined3d_context *context;
6047     UINT width, src_pitch, dst_pitch;
6048     struct wined3d_bo_address data;
6049     struct wined3d_format format;
6050     POINT dst_point = {0, 0};
6051     CONVERT_TYPES convert;
6052     BYTE *mem;
6053
6054     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
6055             && surface_is_offscreen(surface)
6056             && (surface->flags & SFLAG_INDRAWABLE))
6057     {
6058         read_from_framebuffer_texture(surface, srgb);
6059
6060         return WINED3D_OK;
6061     }
6062
6063     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
6064             && (surface->resource.format->flags & attach_flags) == attach_flags
6065             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6066                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6067                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6068     {
6069         if (srgb)
6070             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INTEXTURE,
6071                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
6072         else
6073             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INSRGBTEX,
6074                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
6075
6076         return WINED3D_OK;
6077     }
6078
6079     /* Upload from system memory */
6080
6081     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
6082             TRUE /* We will use textures */, &format, &convert);
6083
6084     if (srgb)
6085     {
6086         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
6087         {
6088             /* Performance warning... */
6089             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6090             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6091         }
6092     }
6093     else
6094     {
6095         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6096         {
6097             /* Performance warning... */
6098             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6099             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6100         }
6101     }
6102
6103     if (!(surface->flags & SFLAG_INSYSMEM))
6104     {
6105         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6106         /* Lets hope we get it from somewhere... */
6107         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6108     }
6109
6110     /* TODO: Use already acquired context when possible. */
6111     context = context_acquire(device, NULL);
6112
6113     surface_prepare_texture(surface, context, srgb);
6114     surface_bind_and_dirtify(surface, context, srgb);
6115
6116     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6117     {
6118         surface->flags |= SFLAG_GLCKEY;
6119         surface->glCKey = surface->SrcBltCKey;
6120     }
6121     else surface->flags &= ~SFLAG_GLCKEY;
6122
6123     width = surface->resource.width;
6124     src_pitch = wined3d_surface_get_pitch(surface);
6125
6126     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6127      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6128      * called. */
6129     if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
6130     {
6131         TRACE("Removing the pbo attached to surface %p.\n", surface);
6132         surface_remove_pbo(surface, gl_info);
6133     }
6134
6135     if (format.convert)
6136     {
6137         /* This code is entered for texture formats which need a fixup. */
6138         UINT height = surface->resource.height;
6139
6140         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6141         dst_pitch = width * format.conv_byte_count;
6142         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6143
6144         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6145         {
6146             ERR("Out of memory (%u).\n", dst_pitch * height);
6147             context_release(context);
6148             return E_OUTOFMEMORY;
6149         }
6150         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6151     }
6152     else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
6153     {
6154         /* This code is only entered for color keying fixups */
6155         UINT height = surface->resource.height;
6156
6157         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6158         dst_pitch = width * format.conv_byte_count;
6159         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6160
6161         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6162         {
6163             ERR("Out of memory (%u).\n", dst_pitch * height);
6164             context_release(context);
6165             return E_OUTOFMEMORY;
6166         }
6167         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6168                 width, height, dst_pitch, convert, surface);
6169     }
6170     else
6171     {
6172         mem = surface->resource.allocatedMemory;
6173     }
6174
6175     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6176     data.addr = mem;
6177     surface_upload_data(surface, gl_info, &format, &src_rect, width, &dst_point, srgb, &data);
6178
6179     context_release(context);
6180
6181     /* Don't delete PBO memory. */
6182     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6183         HeapFree(GetProcessHeap(), 0, mem);
6184
6185     return WINED3D_OK;
6186 }
6187
6188 static void surface_multisample_resolve(struct wined3d_surface *surface)
6189 {
6190     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6191
6192     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6193         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6194
6195     surface_blt_fbo(surface->resource.device, WINED3DTEXF_POINT,
6196             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6197 }
6198
6199 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6200 {
6201     struct wined3d_device *device = surface->resource.device;
6202     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6203     HRESULT hr;
6204
6205     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6206
6207     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6208     {
6209         if (location == SFLAG_INTEXTURE)
6210         {
6211             struct wined3d_context *context = context_acquire(device, NULL);
6212             surface_load_ds_location(surface, context, SFLAG_DS_OFFSCREEN);
6213             context_release(context);
6214             return WINED3D_OK;
6215         }
6216         else
6217         {
6218             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6219             return WINED3DERR_INVALIDCALL;
6220         }
6221     }
6222
6223     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6224         location = SFLAG_INTEXTURE;
6225
6226     if (surface->flags & location)
6227     {
6228         TRACE("Location already up to date.\n");
6229         return WINED3D_OK;
6230     }
6231
6232     if (WARN_ON(d3d_surface))
6233     {
6234         DWORD required_access = resource_access_from_location(location);
6235         if ((surface->resource.access_flags & required_access) != required_access)
6236             WARN("Operation requires %#x access, but surface only has %#x.\n",
6237                     required_access, surface->resource.access_flags);
6238     }
6239
6240     if (!(surface->flags & SFLAG_LOCATIONS))
6241     {
6242         ERR("Surface %p does not have any up to date location.\n", surface);
6243         surface->flags |= SFLAG_LOST;
6244         return WINED3DERR_DEVICELOST;
6245     }
6246
6247     switch (location)
6248     {
6249         case SFLAG_INSYSMEM:
6250             surface_load_sysmem(surface, gl_info, rect);
6251             break;
6252
6253         case SFLAG_INDRAWABLE:
6254             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6255                 return hr;
6256             break;
6257
6258         case SFLAG_INRB_RESOLVED:
6259             surface_multisample_resolve(surface);
6260             break;
6261
6262         case SFLAG_INTEXTURE:
6263         case SFLAG_INSRGBTEX:
6264             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6265                 return hr;
6266             break;
6267
6268         default:
6269             ERR("Don't know how to handle location %#x.\n", location);
6270             break;
6271     }
6272
6273     if (!rect)
6274     {
6275         surface->flags |= location;
6276
6277         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6278             surface_evict_sysmem(surface);
6279     }
6280
6281     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6282             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6283     {
6284         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6285     }
6286
6287     return WINED3D_OK;
6288 }
6289
6290 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6291 {
6292     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6293
6294     /* Not on a swapchain - must be offscreen */
6295     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6296
6297     /* The front buffer is always onscreen */
6298     if (surface == swapchain->front_buffer) return FALSE;
6299
6300     /* If the swapchain is rendered to an FBO, the backbuffer is
6301      * offscreen, otherwise onscreen */
6302     return swapchain->render_to_fbo;
6303 }
6304
6305 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6306 /* Context activation is done by the caller. */
6307 static void ffp_blit_free(struct wined3d_device *device) { }
6308
6309 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6310 /* Context activation is done by the caller. */
6311 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6312 {
6313     BYTE table[256][4];
6314     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6315
6316     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6317
6318     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6319     ENTER_GL();
6320     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6321     LEAVE_GL();
6322 }
6323
6324 /* Context activation is done by the caller. */
6325 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, struct wined3d_surface *surface)
6326 {
6327     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6328
6329     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6330      * else the surface is converted in software at upload time in LoadLocation.
6331      */
6332     if(fixup == COMPLEX_FIXUP_P8 && context->gl_info->supported[EXT_PALETTED_TEXTURE])
6333         ffp_blit_p8_upload_palette(surface, context->gl_info);
6334
6335     ENTER_GL();
6336     glEnable(surface->texture_target);
6337     checkGLcall("glEnable(surface->texture_target)");
6338     LEAVE_GL();
6339     return WINED3D_OK;
6340 }
6341
6342 /* Context activation is done by the caller. */
6343 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6344 {
6345     ENTER_GL();
6346     glDisable(GL_TEXTURE_2D);
6347     checkGLcall("glDisable(GL_TEXTURE_2D)");
6348     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6349     {
6350         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6351         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6352     }
6353     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6354     {
6355         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6356         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6357     }
6358     LEAVE_GL();
6359 }
6360
6361 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6362         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6363         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6364 {
6365     enum complex_fixup src_fixup;
6366
6367     switch (blit_op)
6368     {
6369         case WINED3D_BLIT_OP_COLOR_BLIT:
6370             if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
6371                 return FALSE;
6372
6373             src_fixup = get_complex_fixup(src_format->color_fixup);
6374             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6375             {
6376                 TRACE("Checking support for fixup:\n");
6377                 dump_color_fixup_desc(src_format->color_fixup);
6378             }
6379
6380             if (!is_identity_fixup(dst_format->color_fixup))
6381             {
6382                 TRACE("Destination fixups are not supported\n");
6383                 return FALSE;
6384             }
6385
6386             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6387             {
6388                 TRACE("P8 fixup supported\n");
6389                 return TRUE;
6390             }
6391
6392             /* We only support identity conversions. */
6393             if (is_identity_fixup(src_format->color_fixup))
6394             {
6395                 TRACE("[OK]\n");
6396                 return TRUE;
6397             }
6398
6399             TRACE("[FAILED]\n");
6400             return FALSE;
6401
6402         case WINED3D_BLIT_OP_COLOR_FILL:
6403             if (dst_pool == WINED3DPOOL_SYSTEMMEM)
6404                 return FALSE;
6405
6406             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6407             {
6408                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6409                     return FALSE;
6410             }
6411             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6412             {
6413                 TRACE("Color fill not supported\n");
6414                 return FALSE;
6415             }
6416
6417             /* FIXME: We should reject color fills on formats with fixups,
6418              * but this would break P8 color fills for example. */
6419
6420             return TRUE;
6421
6422         case WINED3D_BLIT_OP_DEPTH_FILL:
6423             return TRUE;
6424
6425         default:
6426             TRACE("Unsupported blit_op=%d\n", blit_op);
6427             return FALSE;
6428     }
6429 }
6430
6431 /* Do not call while under the GL lock. */
6432 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6433         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
6434 {
6435     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6436     struct wined3d_fb_state fb = {&dst_surface, NULL};
6437
6438     return device_clear_render_targets(device, 1, &fb,
6439             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6440 }
6441
6442 /* Do not call while under the GL lock. */
6443 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6444         struct wined3d_surface *surface, const RECT *rect, float depth)
6445 {
6446     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6447     struct wined3d_fb_state fb = {NULL, surface};
6448
6449     return device_clear_render_targets(device, 0, &fb,
6450             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6451 }
6452
6453 const struct blit_shader ffp_blit =  {
6454     ffp_blit_alloc,
6455     ffp_blit_free,
6456     ffp_blit_set,
6457     ffp_blit_unset,
6458     ffp_blit_supported,
6459     ffp_blit_color_fill,
6460     ffp_blit_depth_fill,
6461 };
6462
6463 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6464 {
6465     return WINED3D_OK;
6466 }
6467
6468 /* Context activation is done by the caller. */
6469 static void cpu_blit_free(struct wined3d_device *device)
6470 {
6471 }
6472
6473 /* Context activation is done by the caller. */
6474 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, struct wined3d_surface *surface)
6475 {
6476     return WINED3D_OK;
6477 }
6478
6479 /* Context activation is done by the caller. */
6480 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6481 {
6482 }
6483
6484 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6485         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6486         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6487 {
6488     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6489     {
6490         return TRUE;
6491     }
6492
6493     return FALSE;
6494 }
6495
6496 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6497         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6498         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
6499 {
6500     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6501     const struct wined3d_format *src_format, *dst_format;
6502     struct wined3d_surface *orig_src = src_surface;
6503     WINED3DLOCKED_RECT dlock, slock;
6504     HRESULT hr = WINED3D_OK;
6505     const BYTE *sbuf;
6506     RECT xdst,xsrc;
6507     BYTE *dbuf;
6508     int x, y;
6509
6510     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6511             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6512             flags, fx, debug_d3dtexturefiltertype(filter));
6513
6514     xsrc = *src_rect;
6515
6516     if (!src_surface)
6517     {
6518         RECT full_rect;
6519
6520         full_rect.left = 0;
6521         full_rect.top = 0;
6522         full_rect.right = dst_surface->resource.width;
6523         full_rect.bottom = dst_surface->resource.height;
6524         IntersectRect(&xdst, &full_rect, dst_rect);
6525     }
6526     else
6527     {
6528         BOOL clip_horiz, clip_vert;
6529
6530         xdst = *dst_rect;
6531         clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6532         clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6533
6534         if (clip_vert || clip_horiz)
6535         {
6536             /* Now check if this is a special case or not... */
6537             if ((flags & WINEDDBLT_DDFX)
6538                     || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6539                     || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6540             {
6541                 WARN("Out of screen rectangle in special case. Not handled right now.\n");
6542                 return WINED3D_OK;
6543             }
6544
6545             if (clip_horiz)
6546             {
6547                 if (xdst.left < 0)
6548                 {
6549                     xsrc.left -= xdst.left;
6550                     xdst.left = 0;
6551                 }
6552                 if (xdst.right > dst_surface->resource.width)
6553                 {
6554                     xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6555                     xdst.right = (int)dst_surface->resource.width;
6556                 }
6557             }
6558
6559             if (clip_vert)
6560             {
6561                 if (xdst.top < 0)
6562                 {
6563                     xsrc.top -= xdst.top;
6564                     xdst.top = 0;
6565                 }
6566                 if (xdst.bottom > dst_surface->resource.height)
6567                 {
6568                     xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6569                     xdst.bottom = (int)dst_surface->resource.height;
6570                 }
6571             }
6572
6573             /* And check if after clipping something is still to be done... */
6574             if ((xdst.right <= 0) || (xdst.bottom <= 0)
6575                     || (xdst.left >= (int)dst_surface->resource.width)
6576                     || (xdst.top >= (int)dst_surface->resource.height)
6577                     || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6578                     || (xsrc.left >= (int)src_surface->resource.width)
6579                     || (xsrc.top >= (int)src_surface->resource.height))
6580             {
6581                 TRACE("Nothing to be done after clipping.\n");
6582                 return WINED3D_OK;
6583             }
6584         }
6585     }
6586
6587     if (src_surface == dst_surface)
6588     {
6589         wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6590         slock = dlock;
6591         src_format = dst_surface->resource.format;
6592         dst_format = src_format;
6593     }
6594     else
6595     {
6596         dst_format = dst_surface->resource.format;
6597         if (src_surface)
6598         {
6599             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6600             {
6601                 src_surface = surface_convert_format(src_surface, dst_format->id);
6602                 if (!src_surface)
6603                 {
6604                     /* The conv function writes a FIXME */
6605                     WARN("Cannot convert source surface format to dest format.\n");
6606                     goto release;
6607                 }
6608             }
6609             wined3d_surface_map(src_surface, &slock, NULL, WINED3DLOCK_READONLY);
6610             src_format = src_surface->resource.format;
6611         }
6612         else
6613         {
6614             src_format = dst_format;
6615         }
6616         if (dst_rect)
6617             wined3d_surface_map(dst_surface, &dlock, &xdst, 0);
6618         else
6619             wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6620     }
6621
6622     bpp = dst_surface->resource.format->byte_count;
6623     srcheight = xsrc.bottom - xsrc.top;
6624     srcwidth = xsrc.right - xsrc.left;
6625     dstheight = xdst.bottom - xdst.top;
6626     dstwidth = xdst.right - xdst.left;
6627     width = (xdst.right - xdst.left) * bpp;
6628
6629     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_COMPRESSED)
6630     {
6631         UINT row_block_count;
6632
6633         if (flags || src_surface == dst_surface)
6634         {
6635             FIXME("Only plain blits supported on compressed surfaces.\n");
6636             hr = E_NOTIMPL;
6637             goto release;
6638         }
6639
6640         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6641
6642         if (srcheight != dstheight || srcwidth != dstwidth)
6643         {
6644             WARN("Stretching not supported on compressed surfaces.\n");
6645             hr = WINED3DERR_INVALIDCALL;
6646             goto release;
6647         }
6648
6649         dbuf = dlock.pBits;
6650         sbuf = slock.pBits;
6651
6652         row_block_count = (dstwidth + dst_format->block_width - 1) / dst_format->block_width;
6653         for (y = 0; y < dstheight; y += dst_format->block_height)
6654         {
6655             memcpy(dbuf, sbuf, row_block_count * dst_format->block_byte_count);
6656             dbuf += dlock.Pitch;
6657             sbuf += slock.Pitch;
6658         }
6659
6660         goto release;
6661     }
6662
6663     if (dst_rect && src_surface != dst_surface)
6664         dbuf = dlock.pBits;
6665     else
6666         dbuf = (BYTE*)dlock.pBits+(xdst.top*dlock.Pitch)+(xdst.left*bpp);
6667
6668     /* First, all the 'source-less' blits */
6669     if (flags & WINEDDBLT_COLORFILL)
6670     {
6671         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dlock.Pitch, fx->u5.dwFillColor);
6672         flags &= ~WINEDDBLT_COLORFILL;
6673     }
6674
6675     if (flags & WINEDDBLT_DEPTHFILL)
6676     {
6677         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6678     }
6679     if (flags & WINEDDBLT_ROP)
6680     {
6681         /* Catch some degenerate cases here. */
6682         switch (fx->dwROP)
6683         {
6684             case BLACKNESS:
6685                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,0);
6686                 break;
6687             case 0xAA0029: /* No-op */
6688                 break;
6689             case WHITENESS:
6690                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,~0);
6691                 break;
6692             case SRCCOPY: /* Well, we do that below? */
6693                 break;
6694             default:
6695                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6696                 goto error;
6697         }
6698         flags &= ~WINEDDBLT_ROP;
6699     }
6700     if (flags & WINEDDBLT_DDROPS)
6701     {
6702         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6703     }
6704     /* Now the 'with source' blits. */
6705     if (src_surface)
6706     {
6707         const BYTE *sbase;
6708         int sx, xinc, sy, yinc;
6709
6710         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6711             goto release;
6712
6713         if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
6714                 && (srcwidth != dstwidth || srcheight != dstheight))
6715         {
6716             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6717             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6718         }
6719
6720         sbase = (BYTE*)slock.pBits+(xsrc.top*slock.Pitch)+xsrc.left*bpp;
6721         xinc = (srcwidth << 16) / dstwidth;
6722         yinc = (srcheight << 16) / dstheight;
6723
6724         if (!flags)
6725         {
6726             /* No effects, we can cheat here. */
6727             if (dstwidth == srcwidth)
6728             {
6729                 if (dstheight == srcheight)
6730                 {
6731                     /* No stretching in either direction. This needs to be as
6732                      * fast as possible. */
6733                     sbuf = sbase;
6734
6735                     /* Check for overlapping surfaces. */
6736                     if (src_surface != dst_surface || xdst.top < xsrc.top
6737                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6738                     {
6739                         /* No overlap, or dst above src, so copy from top downwards. */
6740                         for (y = 0; y < dstheight; ++y)
6741                         {
6742                             memcpy(dbuf, sbuf, width);
6743                             sbuf += slock.Pitch;
6744                             dbuf += dlock.Pitch;
6745                         }
6746                     }
6747                     else if (xdst.top > xsrc.top)
6748                     {
6749                         /* Copy from bottom upwards. */
6750                         sbuf += (slock.Pitch*dstheight);
6751                         dbuf += (dlock.Pitch*dstheight);
6752                         for (y = 0; y < dstheight; ++y)
6753                         {
6754                             sbuf -= slock.Pitch;
6755                             dbuf -= dlock.Pitch;
6756                             memcpy(dbuf, sbuf, width);
6757                         }
6758                     }
6759                     else
6760                     {
6761                         /* Src and dst overlapping on the same line, use memmove. */
6762                         for (y = 0; y < dstheight; ++y)
6763                         {
6764                             memmove(dbuf, sbuf, width);
6765                             sbuf += slock.Pitch;
6766                             dbuf += dlock.Pitch;
6767                         }
6768                     }
6769                 }
6770                 else
6771                 {
6772                     /* Stretching in y direction only. */
6773                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6774                     {
6775                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6776                         memcpy(dbuf, sbuf, width);
6777                         dbuf += dlock.Pitch;
6778                     }
6779                 }
6780             }
6781             else
6782             {
6783                 /* Stretching in X direction. */
6784                 int last_sy = -1;
6785                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6786                 {
6787                     sbuf = sbase + (sy >> 16) * slock.Pitch;
6788
6789                     if ((sy >> 16) == (last_sy >> 16))
6790                     {
6791                         /* This source row is the same as last source row -
6792                          * Copy the already stretched row. */
6793                         memcpy(dbuf, dbuf - dlock.Pitch, width);
6794                     }
6795                     else
6796                     {
6797 #define STRETCH_ROW(type) \
6798 do { \
6799     const type *s = (const type *)sbuf; \
6800     type *d = (type *)dbuf; \
6801     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6802         d[x] = s[sx >> 16]; \
6803 } while(0)
6804
6805                         switch(bpp)
6806                         {
6807                             case 1:
6808                                 STRETCH_ROW(BYTE);
6809                                 break;
6810                             case 2:
6811                                 STRETCH_ROW(WORD);
6812                                 break;
6813                             case 4:
6814                                 STRETCH_ROW(DWORD);
6815                                 break;
6816                             case 3:
6817                             {
6818                                 const BYTE *s;
6819                                 BYTE *d = dbuf;
6820                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6821                                 {
6822                                     DWORD pixel;
6823
6824                                     s = sbuf + 3 * (sx >> 16);
6825                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6826                                     d[0] = (pixel      ) & 0xff;
6827                                     d[1] = (pixel >>  8) & 0xff;
6828                                     d[2] = (pixel >> 16) & 0xff;
6829                                     d += 3;
6830                                 }
6831                                 break;
6832                             }
6833                             default:
6834                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6835                                 hr = WINED3DERR_NOTAVAILABLE;
6836                                 goto error;
6837                         }
6838 #undef STRETCH_ROW
6839                     }
6840                     dbuf += dlock.Pitch;
6841                     last_sy = sy;
6842                 }
6843             }
6844         }
6845         else
6846         {
6847             LONG dstyinc = dlock.Pitch, dstxinc = bpp;
6848             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6849             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6850             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6851             {
6852                 /* The color keying flags are checked for correctness in ddraw */
6853                 if (flags & WINEDDBLT_KEYSRC)
6854                 {
6855                     keylow  = src_surface->SrcBltCKey.dwColorSpaceLowValue;
6856                     keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
6857                 }
6858                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6859                 {
6860                     keylow = fx->ddckSrcColorkey.dwColorSpaceLowValue;
6861                     keyhigh = fx->ddckSrcColorkey.dwColorSpaceHighValue;
6862                 }
6863
6864                 if (flags & WINEDDBLT_KEYDEST)
6865                 {
6866                     /* Destination color keys are taken from the source surface! */
6867                     destkeylow = src_surface->DestBltCKey.dwColorSpaceLowValue;
6868                     destkeyhigh = src_surface->DestBltCKey.dwColorSpaceHighValue;
6869                 }
6870                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6871                 {
6872                     destkeylow = fx->ddckDestColorkey.dwColorSpaceLowValue;
6873                     destkeyhigh = fx->ddckDestColorkey.dwColorSpaceHighValue;
6874                 }
6875
6876                 if (bpp == 1)
6877                 {
6878                     keymask = 0xff;
6879                 }
6880                 else
6881                 {
6882                     keymask = src_format->red_mask
6883                             | src_format->green_mask
6884                             | src_format->blue_mask;
6885                 }
6886                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6887             }
6888
6889             if (flags & WINEDDBLT_DDFX)
6890             {
6891                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6892                 LONG tmpxy;
6893                 dTopLeft     = dbuf;
6894                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6895                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dlock.Pitch);
6896                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6897
6898                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6899                 {
6900                     /* I don't think we need to do anything about this flag */
6901                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6902                 }
6903                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6904                 {
6905                     tmp          = dTopRight;
6906                     dTopRight    = dTopLeft;
6907                     dTopLeft     = tmp;
6908                     tmp          = dBottomRight;
6909                     dBottomRight = dBottomLeft;
6910                     dBottomLeft  = tmp;
6911                     dstxinc = dstxinc * -1;
6912                 }
6913                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6914                 {
6915                     tmp          = dTopLeft;
6916                     dTopLeft     = dBottomLeft;
6917                     dBottomLeft  = tmp;
6918                     tmp          = dTopRight;
6919                     dTopRight    = dBottomRight;
6920                     dBottomRight = tmp;
6921                     dstyinc = dstyinc * -1;
6922                 }
6923                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6924                 {
6925                     /* I don't think we need to do anything about this flag */
6926                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6927                 }
6928                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6929                 {
6930                     tmp          = dBottomRight;
6931                     dBottomRight = dTopLeft;
6932                     dTopLeft     = tmp;
6933                     tmp          = dBottomLeft;
6934                     dBottomLeft  = dTopRight;
6935                     dTopRight    = tmp;
6936                     dstxinc = dstxinc * -1;
6937                     dstyinc = dstyinc * -1;
6938                 }
6939                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6940                 {
6941                     tmp          = dTopLeft;
6942                     dTopLeft     = dBottomLeft;
6943                     dBottomLeft  = dBottomRight;
6944                     dBottomRight = dTopRight;
6945                     dTopRight    = tmp;
6946                     tmpxy   = dstxinc;
6947                     dstxinc = dstyinc;
6948                     dstyinc = tmpxy;
6949                     dstxinc = dstxinc * -1;
6950                 }
6951                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6952                 {
6953                     tmp          = dTopLeft;
6954                     dTopLeft     = dTopRight;
6955                     dTopRight    = dBottomRight;
6956                     dBottomRight = dBottomLeft;
6957                     dBottomLeft  = tmp;
6958                     tmpxy   = dstxinc;
6959                     dstxinc = dstyinc;
6960                     dstyinc = tmpxy;
6961                     dstyinc = dstyinc * -1;
6962                 }
6963                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6964                 {
6965                     /* I don't think we need to do anything about this flag */
6966                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6967                 }
6968                 dbuf = dTopLeft;
6969                 flags &= ~(WINEDDBLT_DDFX);
6970             }
6971
6972 #define COPY_COLORKEY_FX(type) \
6973 do { \
6974     const type *s; \
6975     type *d = (type *)dbuf, *dx, tmp; \
6976     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
6977     { \
6978         s = (const type *)(sbase + (sy >> 16) * slock.Pitch); \
6979         dx = d; \
6980         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6981         { \
6982             tmp = s[sx >> 16]; \
6983             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
6984                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
6985             { \
6986                 dx[0] = tmp; \
6987             } \
6988             dx = (type *)(((BYTE *)dx) + dstxinc); \
6989         } \
6990         d = (type *)(((BYTE *)d) + dstyinc); \
6991     } \
6992 } while(0)
6993
6994             switch (bpp)
6995             {
6996                 case 1:
6997                     COPY_COLORKEY_FX(BYTE);
6998                     break;
6999                 case 2:
7000                     COPY_COLORKEY_FX(WORD);
7001                     break;
7002                 case 4:
7003                     COPY_COLORKEY_FX(DWORD);
7004                     break;
7005                 case 3:
7006                 {
7007                     const BYTE *s;
7008                     BYTE *d = dbuf, *dx;
7009                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
7010                     {
7011                         sbuf = sbase + (sy >> 16) * slock.Pitch;
7012                         dx = d;
7013                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7014                         {
7015                             DWORD pixel, dpixel = 0;
7016                             s = sbuf + 3 * (sx>>16);
7017                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7018                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7019                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7020                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7021                             {
7022                                 dx[0] = (pixel      ) & 0xff;
7023                                 dx[1] = (pixel >>  8) & 0xff;
7024                                 dx[2] = (pixel >> 16) & 0xff;
7025                             }
7026                             dx += dstxinc;
7027                         }
7028                         d += dstyinc;
7029                     }
7030                     break;
7031                 }
7032                 default:
7033                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7034                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7035                     hr = WINED3DERR_NOTAVAILABLE;
7036                     goto error;
7037 #undef COPY_COLORKEY_FX
7038             }
7039         }
7040     }
7041
7042 error:
7043     if (flags && FIXME_ON(d3d_surface))
7044     {
7045         FIXME("\tUnsupported flags: %#x.\n", flags);
7046     }
7047
7048 release:
7049     wined3d_surface_unmap(dst_surface);
7050     if (src_surface && src_surface != dst_surface)
7051         wined3d_surface_unmap(src_surface);
7052     /* Release the converted surface, if any. */
7053     if (src_surface && src_surface != orig_src)
7054         wined3d_surface_decref(src_surface);
7055
7056     return hr;
7057 }
7058
7059 /* Do not call while under the GL lock. */
7060 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7061         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
7062 {
7063     static const RECT src_rect;
7064     WINEDDBLTFX BltFx;
7065
7066     memset(&BltFx, 0, sizeof(BltFx));
7067     BltFx.dwSize = sizeof(BltFx);
7068     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7069     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7070             WINEDDBLT_COLORFILL, &BltFx, WINED3DTEXF_POINT);
7071 }
7072
7073 /* Do not call while under the GL lock. */
7074 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7075         struct wined3d_surface *surface, const RECT *rect, float depth)
7076 {
7077     FIXME("Depth filling not implemented by cpu_blit.\n");
7078     return WINED3DERR_INVALIDCALL;
7079 }
7080
7081 const struct blit_shader cpu_blit =  {
7082     cpu_blit_alloc,
7083     cpu_blit_free,
7084     cpu_blit_set,
7085     cpu_blit_unset,
7086     cpu_blit_supported,
7087     cpu_blit_color_fill,
7088     cpu_blit_depth_fill,
7089 };
7090
7091 static HRESULT surface_init(struct wined3d_surface *surface, WINED3DSURFTYPE surface_type, UINT alignment,
7092         UINT width, UINT height, UINT level, BOOL lockable, BOOL discard, WINED3DMULTISAMPLE_TYPE multisample_type,
7093         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7094         WINED3DPOOL pool, void *parent, const struct wined3d_parent_ops *parent_ops)
7095 {
7096     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7097     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7098     unsigned int resource_size;
7099     HRESULT hr;
7100
7101     if (multisample_quality > 0)
7102     {
7103         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7104         multisample_quality = 0;
7105     }
7106
7107     /* Quick lockable sanity check.
7108      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7109      * this function is too deep to need to care about things like this.
7110      * Levels need to be checked too, since they all affect what can be done. */
7111     switch (pool)
7112     {
7113         case WINED3DPOOL_SCRATCH:
7114             if (!lockable)
7115             {
7116                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7117                         "which are mutually exclusive, setting lockable to TRUE.\n");
7118                 lockable = TRUE;
7119             }
7120             break;
7121
7122         case WINED3DPOOL_SYSTEMMEM:
7123             if (!lockable)
7124                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7125             break;
7126
7127         case WINED3DPOOL_MANAGED:
7128             if (usage & WINED3DUSAGE_DYNAMIC)
7129                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7130             break;
7131
7132         case WINED3DPOOL_DEFAULT:
7133             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7134                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7135             break;
7136
7137         default:
7138             FIXME("Unknown pool %#x.\n", pool);
7139             break;
7140     };
7141
7142     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3DPOOL_DEFAULT)
7143         FIXME("Trying to create a render target that isn't in the default pool.\n");
7144
7145     /* FIXME: Check that the format is supported by the device. */
7146
7147     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7148     if (!resource_size)
7149         return WINED3DERR_INVALIDCALL;
7150
7151     surface->surface_type = surface_type;
7152
7153     switch (surface_type)
7154     {
7155         case SURFACE_OPENGL:
7156             surface->surface_ops = &surface_ops;
7157             break;
7158
7159         case SURFACE_GDI:
7160             surface->surface_ops = &gdi_surface_ops;
7161             break;
7162
7163         default:
7164             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7165             return WINED3DERR_INVALIDCALL;
7166     }
7167
7168     hr = resource_init(&surface->resource, device, WINED3DRTYPE_SURFACE, format,
7169             multisample_type, multisample_quality, usage, pool, width, height, 1,
7170             resource_size, parent, parent_ops, &surface_resource_ops);
7171     if (FAILED(hr))
7172     {
7173         WARN("Failed to initialize resource, returning %#x.\n", hr);
7174         return hr;
7175     }
7176
7177     /* "Standalone" surface. */
7178     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7179
7180     surface->texture_level = level;
7181     list_init(&surface->overlays);
7182
7183     /* Flags */
7184     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7185     if (discard)
7186         surface->flags |= SFLAG_DISCARD;
7187     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7188         surface->flags |= SFLAG_LOCKABLE;
7189     /* I'm not sure if this qualifies as a hack or as an optimization. It
7190      * seems reasonable to assume that lockable render targets will get
7191      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7192      * creation. However, the other reason we want to do this is that several
7193      * ddraw applications access surface memory while the surface isn't
7194      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7195      * future locks prevents these from crashing. */
7196     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7197         surface->flags |= SFLAG_DYNLOCK;
7198
7199     /* Mark the texture as dirty so that it gets loaded first time around. */
7200     surface_add_dirty_rect(surface, NULL);
7201     list_init(&surface->renderbuffers);
7202
7203     TRACE("surface %p, memory %p, size %u\n",
7204             surface, surface->resource.allocatedMemory, surface->resource.size);
7205
7206     /* Call the private setup routine */
7207     hr = surface->surface_ops->surface_private_setup(surface);
7208     if (FAILED(hr))
7209     {
7210         ERR("Private setup failed, returning %#x\n", hr);
7211         surface->surface_ops->surface_cleanup(surface);
7212         return hr;
7213     }
7214
7215     return hr;
7216 }
7217
7218 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7219         enum wined3d_format_id format_id, BOOL lockable, BOOL discard, UINT level, DWORD usage, WINED3DPOOL pool,
7220         WINED3DMULTISAMPLE_TYPE multisample_type, DWORD multisample_quality, WINED3DSURFTYPE surface_type,
7221         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7222 {
7223     struct wined3d_surface *object;
7224     HRESULT hr;
7225
7226     TRACE("device %p, width %u, height %u, format %s, lockable %#x, discard %#x, level %u\n",
7227             device, width, height, debug_d3dformat(format_id), lockable, discard, level);
7228     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7229             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7230     TRACE("surface_type %#x, parent %p, parent_ops %p.\n", surface_type, parent, parent_ops);
7231
7232     if (surface_type == SURFACE_OPENGL && !device->adapter)
7233     {
7234         ERR("OpenGL surfaces are not available without OpenGL.\n");
7235         return WINED3DERR_NOTAVAILABLE;
7236     }
7237
7238     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7239     if (!object)
7240     {
7241         ERR("Failed to allocate surface memory.\n");
7242         return WINED3DERR_OUTOFVIDEOMEMORY;
7243     }
7244
7245     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level, lockable,
7246             discard, multisample_type, multisample_quality, device, usage, format_id, pool, parent, parent_ops);
7247     if (FAILED(hr))
7248     {
7249         WARN("Failed to initialize surface, returning %#x.\n", hr);
7250         HeapFree(GetProcessHeap(), 0, object);
7251         return hr;
7252     }
7253
7254     TRACE("Created surface %p.\n", object);
7255     *surface = object;
7256
7257     return hr;
7258 }