wintrust: Add FindCertsByIssuer stub.
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2008 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         WINED3DTEXTUREFILTERTYPE filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     struct wined3d_surface *overlay, *cur;
46
47     TRACE("surface %p.\n", surface);
48
49     if (surface->texture_name || (surface->flags & SFLAG_PBO)
50              || surface->rb_multisample || surface->rb_resolved
51              || !list_empty(&surface->renderbuffers))
52     {
53         struct wined3d_renderbuffer_entry *entry, *entry2;
54         const struct wined3d_gl_info *gl_info;
55         struct wined3d_context *context;
56
57         context = context_acquire(surface->resource.device, NULL);
58         gl_info = context->gl_info;
59
60         ENTER_GL();
61
62         if (surface->texture_name)
63         {
64             TRACE("Deleting texture %u.\n", surface->texture_name);
65             glDeleteTextures(1, &surface->texture_name);
66         }
67
68         if (surface->flags & SFLAG_PBO)
69         {
70             TRACE("Deleting PBO %u.\n", surface->pbo);
71             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
72         }
73
74         if (surface->rb_multisample)
75         {
76             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
77             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
78         }
79
80         if (surface->rb_resolved)
81         {
82             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
83             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
84         }
85
86         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
87         {
88             TRACE("Deleting renderbuffer %u.\n", entry->id);
89             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
90             HeapFree(GetProcessHeap(), 0, entry);
91         }
92
93         LEAVE_GL();
94
95         context_release(context);
96     }
97
98     if (surface->flags & SFLAG_DIBSECTION)
99     {
100         /* Release the DC. */
101         SelectObject(surface->hDC, surface->dib.holdbitmap);
102         DeleteDC(surface->hDC);
103         /* Release the DIB section. */
104         DeleteObject(surface->dib.DIBsection);
105         surface->dib.bitmap_data = NULL;
106         surface->resource.allocatedMemory = NULL;
107     }
108
109     if (surface->flags & SFLAG_USERPTR)
110         wined3d_surface_set_mem(surface, NULL);
111     if (surface->overlay_dest)
112         list_remove(&surface->overlay_entry);
113
114     LIST_FOR_EACH_ENTRY_SAFE(overlay, cur, &surface->overlays, struct wined3d_surface, overlay_entry)
115     {
116         list_remove(&overlay->overlay_entry);
117         overlay->overlay_dest = NULL;
118     }
119
120     HeapFree(GetProcessHeap(), 0, surface->palette9);
121
122     resource_cleanup(&surface->resource);
123 }
124
125 void surface_update_draw_binding(struct wined3d_surface *surface)
126 {
127     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
128         surface->draw_binding = SFLAG_INDRAWABLE;
129     else if (surface->resource.multisample_type)
130         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
131     else
132         surface->draw_binding = SFLAG_INTEXTURE;
133 }
134
135 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
136 {
137     TRACE("surface %p, container %p.\n", surface, container);
138
139     if (!container && type != WINED3D_CONTAINER_NONE)
140         ERR("Setting NULL container of type %#x.\n", type);
141
142     if (type == WINED3D_CONTAINER_SWAPCHAIN)
143     {
144         surface->get_drawable_size = get_drawable_size_swapchain;
145     }
146     else
147     {
148         switch (wined3d_settings.offscreen_rendering_mode)
149         {
150             case ORM_FBO:
151                 surface->get_drawable_size = get_drawable_size_fbo;
152                 break;
153
154             case ORM_BACKBUFFER:
155                 surface->get_drawable_size = get_drawable_size_backbuffer;
156                 break;
157
158             default:
159                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
160                 return;
161         }
162     }
163
164     surface->container.type = type;
165     surface->container.u.base = container;
166     surface_update_draw_binding(surface);
167 }
168
169 struct blt_info
170 {
171     GLenum binding;
172     GLenum bind_target;
173     enum tex_types tex_type;
174     GLfloat coords[4][3];
175 };
176
177 struct float_rect
178 {
179     float l;
180     float t;
181     float r;
182     float b;
183 };
184
185 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
186 {
187     f->l = ((r->left * 2.0f) / w) - 1.0f;
188     f->t = ((r->top * 2.0f) / h) - 1.0f;
189     f->r = ((r->right * 2.0f) / w) - 1.0f;
190     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
191 }
192
193 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
194 {
195     GLfloat (*coords)[3] = info->coords;
196     struct float_rect f;
197
198     switch (target)
199     {
200         default:
201             FIXME("Unsupported texture target %#x\n", target);
202             /* Fall back to GL_TEXTURE_2D */
203         case GL_TEXTURE_2D:
204             info->binding = GL_TEXTURE_BINDING_2D;
205             info->bind_target = GL_TEXTURE_2D;
206             info->tex_type = tex_2d;
207             coords[0][0] = (float)rect->left / w;
208             coords[0][1] = (float)rect->top / h;
209             coords[0][2] = 0.0f;
210
211             coords[1][0] = (float)rect->right / w;
212             coords[1][1] = (float)rect->top / h;
213             coords[1][2] = 0.0f;
214
215             coords[2][0] = (float)rect->left / w;
216             coords[2][1] = (float)rect->bottom / h;
217             coords[2][2] = 0.0f;
218
219             coords[3][0] = (float)rect->right / w;
220             coords[3][1] = (float)rect->bottom / h;
221             coords[3][2] = 0.0f;
222             break;
223
224         case GL_TEXTURE_RECTANGLE_ARB:
225             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
226             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
227             info->tex_type = tex_rect;
228             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
229             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
230             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
231             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
232             break;
233
234         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
235             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
236             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
237             info->tex_type = tex_cube;
238             cube_coords_float(rect, w, h, &f);
239
240             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
241             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
242             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
243             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
244             break;
245
246         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
247             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
248             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
249             info->tex_type = tex_cube;
250             cube_coords_float(rect, w, h, &f);
251
252             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
253             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
254             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
255             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
256             break;
257
258         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
259             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
260             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
261             info->tex_type = tex_cube;
262             cube_coords_float(rect, w, h, &f);
263
264             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
265             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
266             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
267             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
268             break;
269
270         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
271             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
272             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
273             info->tex_type = tex_cube;
274             cube_coords_float(rect, w, h, &f);
275
276             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
277             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
278             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
279             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
280             break;
281
282         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
283             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
284             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
285             info->tex_type = tex_cube;
286             cube_coords_float(rect, w, h, &f);
287
288             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
289             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
290             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
291             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
292             break;
293
294         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
295             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
296             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
297             info->tex_type = tex_cube;
298             cube_coords_float(rect, w, h, &f);
299
300             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
301             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
302             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
303             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
304             break;
305     }
306 }
307
308 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
309 {
310     if (rect_in)
311         *rect_out = *rect_in;
312     else
313     {
314         rect_out->left = 0;
315         rect_out->top = 0;
316         rect_out->right = surface->resource.width;
317         rect_out->bottom = surface->resource.height;
318     }
319 }
320
321 /* GL locking and context activation is done by the caller */
322 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
323         const RECT *src_rect, const RECT *dst_rect, WINED3DTEXTUREFILTERTYPE Filter)
324 {
325     struct blt_info info;
326
327     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
328
329     glEnable(info.bind_target);
330     checkGLcall("glEnable(bind_target)");
331
332     context_bind_texture(context, info.bind_target, src_surface->texture_name);
333
334     /* Filtering for StretchRect */
335     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
336             wined3d_gl_mag_filter(magLookup, Filter));
337     checkGLcall("glTexParameteri");
338     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
339             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
340     checkGLcall("glTexParameteri");
341     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
342     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
343     if (context->gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
344         glTexParameteri(info.bind_target, GL_TEXTURE_SRGB_DECODE_EXT, GL_SKIP_DECODE_EXT);
345     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
346     checkGLcall("glTexEnvi");
347
348     /* Draw a quad */
349     glBegin(GL_TRIANGLE_STRIP);
350     glTexCoord3fv(info.coords[0]);
351     glVertex2i(dst_rect->left, dst_rect->top);
352
353     glTexCoord3fv(info.coords[1]);
354     glVertex2i(dst_rect->right, dst_rect->top);
355
356     glTexCoord3fv(info.coords[2]);
357     glVertex2i(dst_rect->left, dst_rect->bottom);
358
359     glTexCoord3fv(info.coords[3]);
360     glVertex2i(dst_rect->right, dst_rect->bottom);
361     glEnd();
362
363     /* Unbind the texture */
364     context_bind_texture(context, info.bind_target, 0);
365
366     /* We changed the filtering settings on the texture. Inform the
367      * container about this to get the filters reset properly next draw. */
368     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
369     {
370         struct wined3d_texture *texture = src_surface->container.u.texture;
371         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3DTEXF_POINT;
372         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3DTEXF_POINT;
373         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3DTEXF_NONE;
374         texture->texture_rgb.states[WINED3DTEXSTA_SRGBTEXTURE] = FALSE;
375     }
376 }
377
378 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
379 {
380     const struct wined3d_format *format = surface->resource.format;
381     SYSTEM_INFO sysInfo;
382     BITMAPINFO *b_info;
383     int extraline = 0;
384     DWORD *masks;
385     UINT usage;
386     HDC dc;
387
388     TRACE("surface %p.\n", surface);
389
390     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
391     {
392         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
393         return WINED3DERR_INVALIDCALL;
394     }
395
396     switch (format->byte_count)
397     {
398         case 2:
399         case 4:
400             /* Allocate extra space to store the RGB bit masks. */
401             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
402             break;
403
404         case 3:
405             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
406             break;
407
408         default:
409             /* Allocate extra space for a palette. */
410             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
411                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
412             break;
413     }
414
415     if (!b_info)
416         return E_OUTOFMEMORY;
417
418     /* Some applications access the surface in via DWORDs, and do not take
419      * the necessary care at the end of the surface. So we need at least
420      * 4 extra bytes at the end of the surface. Check against the page size,
421      * if the last page used for the surface has at least 4 spare bytes we're
422      * safe, otherwise add an extra line to the DIB section. */
423     GetSystemInfo(&sysInfo);
424     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
425     {
426         extraline = 1;
427         TRACE("Adding an extra line to the DIB section.\n");
428     }
429
430     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
431     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
432     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
433     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
434     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
435             * wined3d_surface_get_pitch(surface);
436     b_info->bmiHeader.biPlanes = 1;
437     b_info->bmiHeader.biBitCount = format->byte_count * 8;
438
439     b_info->bmiHeader.biXPelsPerMeter = 0;
440     b_info->bmiHeader.biYPelsPerMeter = 0;
441     b_info->bmiHeader.biClrUsed = 0;
442     b_info->bmiHeader.biClrImportant = 0;
443
444     /* Get the bit masks */
445     masks = (DWORD *)b_info->bmiColors;
446     switch (surface->resource.format->id)
447     {
448         case WINED3DFMT_B8G8R8_UNORM:
449             usage = DIB_RGB_COLORS;
450             b_info->bmiHeader.biCompression = BI_RGB;
451             break;
452
453         case WINED3DFMT_B5G5R5X1_UNORM:
454         case WINED3DFMT_B5G5R5A1_UNORM:
455         case WINED3DFMT_B4G4R4A4_UNORM:
456         case WINED3DFMT_B4G4R4X4_UNORM:
457         case WINED3DFMT_B2G3R3_UNORM:
458         case WINED3DFMT_B2G3R3A8_UNORM:
459         case WINED3DFMT_R10G10B10A2_UNORM:
460         case WINED3DFMT_R8G8B8A8_UNORM:
461         case WINED3DFMT_R8G8B8X8_UNORM:
462         case WINED3DFMT_B10G10R10A2_UNORM:
463         case WINED3DFMT_B5G6R5_UNORM:
464         case WINED3DFMT_R16G16B16A16_UNORM:
465             usage = 0;
466             b_info->bmiHeader.biCompression = BI_BITFIELDS;
467             masks[0] = format->red_mask;
468             masks[1] = format->green_mask;
469             masks[2] = format->blue_mask;
470             break;
471
472         default:
473             /* Don't know palette */
474             b_info->bmiHeader.biCompression = BI_RGB;
475             usage = 0;
476             break;
477     }
478
479     if (!(dc = GetDC(0)))
480     {
481         HeapFree(GetProcessHeap(), 0, b_info);
482         return HRESULT_FROM_WIN32(GetLastError());
483     }
484
485     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
486             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
487             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
488     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
489     ReleaseDC(0, dc);
490
491     if (!surface->dib.DIBsection)
492     {
493         ERR("Failed to create DIB section.\n");
494         HeapFree(GetProcessHeap(), 0, b_info);
495         return HRESULT_FROM_WIN32(GetLastError());
496     }
497
498     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
499     /* Copy the existing surface to the dib section. */
500     if (surface->resource.allocatedMemory)
501     {
502         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
503                 surface->resource.height * wined3d_surface_get_pitch(surface));
504     }
505     else
506     {
507         /* This is to make maps read the GL texture although memory is allocated. */
508         surface->flags &= ~SFLAG_INSYSMEM;
509     }
510     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
511
512     HeapFree(GetProcessHeap(), 0, b_info);
513
514     /* Now allocate a DC. */
515     surface->hDC = CreateCompatibleDC(0);
516     surface->dib.holdbitmap = SelectObject(surface->hDC, surface->dib.DIBsection);
517     TRACE("Using wined3d palette %p.\n", surface->palette);
518     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
519
520     surface->flags |= SFLAG_DIBSECTION;
521
522     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
523     surface->resource.heapMemory = NULL;
524
525     return WINED3D_OK;
526 }
527
528 static void surface_prepare_system_memory(struct wined3d_surface *surface)
529 {
530     struct wined3d_device *device = surface->resource.device;
531     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
532
533     TRACE("surface %p.\n", surface);
534
535     /* Performance optimization: Count how often a surface is locked, if it is
536      * locked regularly do not throw away the system memory copy. This avoids
537      * the need to download the surface from OpenGL all the time. The surface
538      * is still downloaded if the OpenGL texture is changed. */
539     if (!(surface->flags & SFLAG_DYNLOCK))
540     {
541         if (++surface->lockCount > MAXLOCKCOUNT)
542         {
543             TRACE("Surface is locked regularly, not freeing the system memory copy any more.\n");
544             surface->flags |= SFLAG_DYNLOCK;
545         }
546     }
547
548     /* Create a PBO for dynamically locked surfaces but don't do it for
549      * converted or NPOT surfaces. Also don't create a PBO for systemmem
550      * surfaces. */
551     if (gl_info->supported[ARB_PIXEL_BUFFER_OBJECT] && (surface->flags & SFLAG_DYNLOCK)
552             && !(surface->flags & (SFLAG_PBO | SFLAG_CONVERTED | SFLAG_NONPOW2))
553             && (surface->resource.pool != WINED3DPOOL_SYSTEMMEM))
554     {
555         struct wined3d_context *context;
556         GLenum error;
557
558         context = context_acquire(device, NULL);
559         ENTER_GL();
560
561         GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
562         error = glGetError();
563         if (!surface->pbo || error != GL_NO_ERROR)
564             ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
565
566         TRACE("Binding PBO %u.\n", surface->pbo);
567
568         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
569         checkGLcall("glBindBufferARB");
570
571         GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
572                 surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
573         checkGLcall("glBufferDataARB");
574
575         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
576         checkGLcall("glBindBufferARB");
577
578         /* We don't need the system memory anymore and we can't even use it for PBOs. */
579         if (!(surface->flags & SFLAG_CLIENT))
580         {
581             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
582             surface->resource.heapMemory = NULL;
583         }
584         surface->resource.allocatedMemory = NULL;
585         surface->flags |= SFLAG_PBO;
586         LEAVE_GL();
587         context_release(context);
588     }
589     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
590     {
591         /* Whatever surface we have, make sure that there is memory allocated
592          * for the downloaded copy, or a PBO to map. */
593         if (!surface->resource.heapMemory)
594             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
595
596         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
597                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
598
599         if (surface->flags & SFLAG_INSYSMEM)
600             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
601     }
602 }
603
604 static void surface_evict_sysmem(struct wined3d_surface *surface)
605 {
606     if (surface->flags & SFLAG_DONOTFREE)
607         return;
608
609     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
610     surface->resource.allocatedMemory = NULL;
611     surface->resource.heapMemory = NULL;
612     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
613 }
614
615 /* Context activation is done by the caller. */
616 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
617         struct wined3d_context *context, BOOL srgb)
618 {
619     struct wined3d_device *device = surface->resource.device;
620     DWORD active_sampler;
621
622     /* We don't need a specific texture unit, but after binding the texture
623      * the current unit is dirty. Read the unit back instead of switching to
624      * 0, this avoids messing around with the state manager's GL states. The
625      * current texture unit should always be a valid one.
626      *
627      * To be more specific, this is tricky because we can implicitly be
628      * called from sampler() in state.c. This means we can't touch anything
629      * other than whatever happens to be the currently active texture, or we
630      * would risk marking already applied sampler states dirty again. */
631     active_sampler = device->rev_tex_unit_map[context->active_texture];
632
633     if (active_sampler != WINED3D_UNMAPPED_STAGE)
634         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
635     surface_bind(surface, context, srgb);
636 }
637
638 static void surface_force_reload(struct wined3d_surface *surface)
639 {
640     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
641 }
642
643 static void surface_release_client_storage(struct wined3d_surface *surface)
644 {
645     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
646
647     ENTER_GL();
648     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
649     if (surface->texture_name)
650     {
651         surface_bind_and_dirtify(surface, context, FALSE);
652         glTexImage2D(surface->texture_target, surface->texture_level,
653                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
654     }
655     if (surface->texture_name_srgb)
656     {
657         surface_bind_and_dirtify(surface, context, TRUE);
658         glTexImage2D(surface->texture_target, surface->texture_level,
659                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
660     }
661     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
662     LEAVE_GL();
663
664     context_release(context);
665
666     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
667     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
668     surface_force_reload(surface);
669 }
670
671 static HRESULT surface_private_setup(struct wined3d_surface *surface)
672 {
673     /* TODO: Check against the maximum texture sizes supported by the video card. */
674     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
675     unsigned int pow2Width, pow2Height;
676
677     TRACE("surface %p.\n", surface);
678
679     surface->texture_name = 0;
680     surface->texture_target = GL_TEXTURE_2D;
681
682     /* Non-power2 support */
683     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
684     {
685         pow2Width = surface->resource.width;
686         pow2Height = surface->resource.height;
687     }
688     else
689     {
690         /* Find the nearest pow2 match */
691         pow2Width = pow2Height = 1;
692         while (pow2Width < surface->resource.width)
693             pow2Width <<= 1;
694         while (pow2Height < surface->resource.height)
695             pow2Height <<= 1;
696     }
697     surface->pow2Width = pow2Width;
698     surface->pow2Height = pow2Height;
699
700     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
701     {
702         /* TODO: Add support for non power two compressed textures. */
703         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
704         {
705             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
706                   surface, surface->resource.width, surface->resource.height);
707             return WINED3DERR_NOTAVAILABLE;
708         }
709     }
710
711     if (pow2Width != surface->resource.width
712             || pow2Height != surface->resource.height)
713     {
714         surface->flags |= SFLAG_NONPOW2;
715     }
716
717     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
718             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
719     {
720         /* One of three options:
721          * 1: Do the same as we do with NPOT and scale the texture, (any
722          *    texture ops would require the texture to be scaled which is
723          *    potentially slow)
724          * 2: Set the texture to the maximum size (bad idea).
725          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
726          * 4: Create the surface, but allow it to be used only for DirectDraw
727          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
728          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
729          *    the render target. */
730         if (surface->resource.pool == WINED3DPOOL_DEFAULT || surface->resource.pool == WINED3DPOOL_MANAGED)
731         {
732             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
733             return WINED3DERR_NOTAVAILABLE;
734         }
735
736         /* We should never use this surface in combination with OpenGL! */
737         TRACE("Creating an oversized surface: %ux%u.\n",
738                 surface->pow2Width, surface->pow2Height);
739     }
740     else
741     {
742         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
743          * and EXT_PALETTED_TEXTURE is used in combination with texture
744          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
745          * EXT_PALETTED_TEXTURE doesn't work in combination with
746          * ARB_TEXTURE_RECTANGLE. */
747         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
748                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
749                 && gl_info->supported[EXT_PALETTED_TEXTURE]
750                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
751         {
752             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
753             surface->pow2Width = surface->resource.width;
754             surface->pow2Height = surface->resource.height;
755             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
756         }
757     }
758
759     switch (wined3d_settings.offscreen_rendering_mode)
760     {
761         case ORM_FBO:
762             surface->get_drawable_size = get_drawable_size_fbo;
763             break;
764
765         case ORM_BACKBUFFER:
766             surface->get_drawable_size = get_drawable_size_backbuffer;
767             break;
768
769         default:
770             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
771             return WINED3DERR_INVALIDCALL;
772     }
773
774     surface->flags |= SFLAG_INSYSMEM;
775
776     return WINED3D_OK;
777 }
778
779 static void surface_realize_palette(struct wined3d_surface *surface)
780 {
781     struct wined3d_palette *palette = surface->palette;
782
783     TRACE("surface %p.\n", surface);
784
785     if (!palette) return;
786
787     if (surface->resource.format->id == WINED3DFMT_P8_UINT
788             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
789     {
790         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
791         {
792             /* Make sure the texture is up to date. This call doesn't do
793              * anything if the texture is already up to date. */
794             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
795
796             /* We want to force a palette refresh, so mark the drawable as not being up to date */
797             if (!surface_is_offscreen(surface))
798                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
799         }
800         else
801         {
802             if (!(surface->flags & SFLAG_INSYSMEM))
803             {
804                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
805                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
806             }
807             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
808         }
809     }
810
811     if (surface->flags & SFLAG_DIBSECTION)
812     {
813         RGBQUAD col[256];
814         unsigned int i;
815
816         TRACE("Updating the DC's palette.\n");
817
818         for (i = 0; i < 256; ++i)
819         {
820             col[i].rgbRed   = palette->palents[i].peRed;
821             col[i].rgbGreen = palette->palents[i].peGreen;
822             col[i].rgbBlue  = palette->palents[i].peBlue;
823             col[i].rgbReserved = 0;
824         }
825         SetDIBColorTable(surface->hDC, 0, 256, col);
826     }
827
828     /* Propagate the changes to the drawable when we have a palette. */
829     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
830         surface_load_location(surface, surface->draw_binding, NULL);
831 }
832
833 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
834 {
835     HRESULT hr;
836
837     /* If there's no destination surface there is nothing to do. */
838     if (!surface->overlay_dest)
839         return WINED3D_OK;
840
841     /* Blt calls ModifyLocation on the dest surface, which in turn calls
842      * DrawOverlay to update the overlay. Prevent an endless recursion. */
843     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
844         return WINED3D_OK;
845
846     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
847     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
848             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3DTEXF_LINEAR);
849     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
850
851     return hr;
852 }
853
854 static void surface_preload(struct wined3d_surface *surface)
855 {
856     TRACE("surface %p.\n", surface);
857
858     surface_internal_preload(surface, SRGB_ANY);
859 }
860
861 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
862 {
863     struct wined3d_device *device = surface->resource.device;
864     const RECT *pass_rect = rect;
865
866     TRACE("surface %p, rect %s, flags %#x.\n",
867             surface, wine_dbgstr_rect(rect), flags);
868
869     if (flags & WINED3DLOCK_DISCARD)
870     {
871         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
872         surface_prepare_system_memory(surface);
873         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
874     }
875     else
876     {
877         /* surface_load_location() does not check if the rectangle specifies
878          * the full surface. Most callers don't need that, so do it here. */
879         if (rect && !rect->top && !rect->left
880                 && rect->right == surface->resource.width
881                 && rect->bottom == surface->resource.height)
882             pass_rect = NULL;
883
884         if (!(wined3d_settings.rendertargetlock_mode == RTL_DISABLE
885                 && ((surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
886                 || surface == device->fb.render_targets[0])))
887             surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
888     }
889
890     if (surface->flags & SFLAG_PBO)
891     {
892         const struct wined3d_gl_info *gl_info;
893         struct wined3d_context *context;
894
895         context = context_acquire(device, NULL);
896         gl_info = context->gl_info;
897
898         ENTER_GL();
899         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
900         checkGLcall("glBindBufferARB");
901
902         /* This shouldn't happen but could occur if some other function
903          * didn't handle the PBO properly. */
904         if (surface->resource.allocatedMemory)
905             ERR("The surface already has PBO memory allocated.\n");
906
907         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
908         checkGLcall("glMapBufferARB");
909
910         /* Make sure the PBO isn't set anymore in order not to break non-PBO
911          * calls. */
912         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
913         checkGLcall("glBindBufferARB");
914
915         LEAVE_GL();
916         context_release(context);
917     }
918
919     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
920     {
921         if (!rect)
922             surface_add_dirty_rect(surface, NULL);
923         else
924         {
925             WINED3DBOX b;
926
927             b.Left = rect->left;
928             b.Top = rect->top;
929             b.Right = rect->right;
930             b.Bottom = rect->bottom;
931             b.Front = 0;
932             b.Back = 1;
933             surface_add_dirty_rect(surface, &b);
934         }
935     }
936 }
937
938 static void surface_unmap(struct wined3d_surface *surface)
939 {
940     struct wined3d_device *device = surface->resource.device;
941     BOOL fullsurface;
942
943     TRACE("surface %p.\n", surface);
944
945     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
946
947     if (surface->flags & SFLAG_PBO)
948     {
949         const struct wined3d_gl_info *gl_info;
950         struct wined3d_context *context;
951
952         TRACE("Freeing PBO memory.\n");
953
954         context = context_acquire(device, NULL);
955         gl_info = context->gl_info;
956
957         ENTER_GL();
958         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
959         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
960         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
961         checkGLcall("glUnmapBufferARB");
962         LEAVE_GL();
963         context_release(context);
964
965         surface->resource.allocatedMemory = NULL;
966     }
967
968     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
969
970     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
971     {
972         TRACE("Not dirtified, nothing to do.\n");
973         goto done;
974     }
975
976     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
977             || (device->fb.render_targets && surface == device->fb.render_targets[0]))
978     {
979         if (wined3d_settings.rendertargetlock_mode == RTL_DISABLE)
980         {
981             static BOOL warned = FALSE;
982             if (!warned)
983             {
984                 ERR("The application tries to write to the render target, but render target locking is disabled.\n");
985                 warned = TRUE;
986             }
987             goto done;
988         }
989
990         if (!surface->dirtyRect.left && !surface->dirtyRect.top
991                 && surface->dirtyRect.right == surface->resource.width
992                 && surface->dirtyRect.bottom == surface->resource.height)
993         {
994             fullsurface = TRUE;
995         }
996         else
997         {
998             /* TODO: Proper partial rectangle tracking. */
999             fullsurface = FALSE;
1000             surface->flags |= SFLAG_INSYSMEM;
1001         }
1002
1003         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
1004
1005         /* Partial rectangle tracking is not commonly implemented, it is only
1006          * done for render targets. INSYSMEM was set before to tell
1007          * surface_load_location() where to read the rectangle from.
1008          * Indrawable is set because all modifications from the partial
1009          * sysmem copy are written back to the drawable, thus the surface is
1010          * merged again in the drawable. The sysmem copy is not fully up to
1011          * date because only a subrectangle was read in Map(). */
1012         if (!fullsurface)
1013         {
1014             surface_modify_location(surface, surface->draw_binding, TRUE);
1015             surface_evict_sysmem(surface);
1016         }
1017
1018         surface->dirtyRect.left = surface->resource.width;
1019         surface->dirtyRect.top = surface->resource.height;
1020         surface->dirtyRect.right = 0;
1021         surface->dirtyRect.bottom = 0;
1022     }
1023     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
1024     {
1025         FIXME("Depth / stencil buffer locking is not implemented.\n");
1026     }
1027
1028 done:
1029     /* Overlays have to be redrawn manually after changes with the GL implementation */
1030     if (surface->overlay_dest)
1031         surface->surface_ops->surface_draw_overlay(surface);
1032 }
1033
1034 static HRESULT surface_getdc(struct wined3d_surface *surface)
1035 {
1036     WINED3DLOCKED_RECT lock;
1037     HRESULT hr;
1038
1039     TRACE("surface %p.\n", surface);
1040
1041     /* Create a DIB section if there isn't a dc yet. */
1042     if (!surface->hDC)
1043     {
1044         if (surface->flags & SFLAG_CLIENT)
1045         {
1046             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1047             surface_release_client_storage(surface);
1048         }
1049         hr = surface_create_dib_section(surface);
1050         if (FAILED(hr))
1051             return WINED3DERR_INVALIDCALL;
1052
1053         /* Use the DIB section from now on if we are not using a PBO. */
1054         if (!(surface->flags & SFLAG_PBO))
1055             surface->resource.allocatedMemory = surface->dib.bitmap_data;
1056     }
1057
1058     /* Map the surface. */
1059     hr = wined3d_surface_map(surface, &lock, NULL, 0);
1060     if (FAILED(hr))
1061         ERR("Map failed, hr %#x.\n", hr);
1062
1063     /* Sync the DIB with the PBO. This can't be done earlier because Map()
1064      * activates the allocatedMemory. */
1065     if (surface->flags & SFLAG_PBO)
1066         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
1067
1068     return hr;
1069 }
1070
1071 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1072 {
1073     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1074         return FALSE;
1075     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1076         return FALSE;
1077     return TRUE;
1078 }
1079
1080 static void wined3d_surface_depth_blt_fbo(struct wined3d_device *device, struct wined3d_surface *src_surface,
1081         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1082 {
1083     const struct wined3d_gl_info *gl_info;
1084     struct wined3d_context *context;
1085     DWORD src_mask, dst_mask;
1086     GLbitfield gl_mask;
1087
1088     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1089             device, src_surface, wine_dbgstr_rect(src_rect),
1090             dst_surface, wine_dbgstr_rect(dst_rect));
1091
1092     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1093     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1094
1095     if (src_mask != dst_mask)
1096     {
1097         ERR("Incompatible formats %s and %s.\n",
1098                 debug_d3dformat(src_surface->resource.format->id),
1099                 debug_d3dformat(dst_surface->resource.format->id));
1100         return;
1101     }
1102
1103     if (!src_mask)
1104     {
1105         ERR("Not a depth / stencil format: %s.\n",
1106                 debug_d3dformat(src_surface->resource.format->id));
1107         return;
1108     }
1109
1110     gl_mask = 0;
1111     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1112         gl_mask |= GL_DEPTH_BUFFER_BIT;
1113     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1114         gl_mask |= GL_STENCIL_BUFFER_BIT;
1115
1116     /* Make sure the locations are up-to-date. Loading the destination
1117      * surface isn't required if the entire surface is overwritten. */
1118     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1119     if (!surface_is_full_rect(dst_surface, dst_rect))
1120         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1121
1122     context = context_acquire(device, NULL);
1123     if (!context->valid)
1124     {
1125         context_release(context);
1126         WARN("Invalid context, skipping blit.\n");
1127         return;
1128     }
1129
1130     gl_info = context->gl_info;
1131
1132     ENTER_GL();
1133
1134     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1135     glReadBuffer(GL_NONE);
1136     checkGLcall("glReadBuffer()");
1137     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1138
1139     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1140     context_set_draw_buffer(context, GL_NONE);
1141     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1142
1143     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1144     {
1145         glDepthMask(GL_TRUE);
1146         context_invalidate_state(context, STATE_RENDER(WINED3DRS_ZWRITEENABLE));
1147     }
1148     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1149     {
1150         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1151         {
1152             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1153             context_invalidate_state(context, STATE_RENDER(WINED3DRS_TWOSIDEDSTENCILMODE));
1154         }
1155         glStencilMask(~0U);
1156         context_invalidate_state(context, STATE_RENDER(WINED3DRS_STENCILWRITEMASK));
1157     }
1158
1159     glDisable(GL_SCISSOR_TEST);
1160     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1161
1162     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1163             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1164     checkGLcall("glBlitFramebuffer()");
1165
1166     LEAVE_GL();
1167
1168     if (wined3d_settings.strict_draw_ordering)
1169         wglFlush(); /* Flush to ensure ordering across contexts. */
1170
1171     context_release(context);
1172 }
1173
1174 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1175  * Depth / stencil is not supported. */
1176 static void surface_blt_fbo(struct wined3d_device *device, const WINED3DTEXTUREFILTERTYPE filter,
1177         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1178         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1179 {
1180     const struct wined3d_gl_info *gl_info;
1181     struct wined3d_context *context;
1182     RECT src_rect, dst_rect;
1183     GLenum gl_filter;
1184     GLenum buffer;
1185
1186     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1187     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1188             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1189     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1190             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1191
1192     src_rect = *src_rect_in;
1193     dst_rect = *dst_rect_in;
1194
1195     switch (filter)
1196     {
1197         case WINED3DTEXF_LINEAR:
1198             gl_filter = GL_LINEAR;
1199             break;
1200
1201         default:
1202             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1203         case WINED3DTEXF_NONE:
1204         case WINED3DTEXF_POINT:
1205             gl_filter = GL_NEAREST;
1206             break;
1207     }
1208
1209     /* Resolve the source surface first if needed. */
1210     if (src_location == SFLAG_INRB_MULTISAMPLE
1211             && (src_surface->resource.format->id != dst_surface->resource.format->id
1212                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1213                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1214         src_location = SFLAG_INRB_RESOLVED;
1215
1216     /* Make sure the locations are up-to-date. Loading the destination
1217      * surface isn't required if the entire surface is overwritten. (And is
1218      * in fact harmful if we're being called by surface_load_location() with
1219      * the purpose of loading the destination surface.) */
1220     surface_load_location(src_surface, src_location, NULL);
1221     if (!surface_is_full_rect(dst_surface, &dst_rect))
1222         surface_load_location(dst_surface, dst_location, NULL);
1223
1224     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1225     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1226     else context = context_acquire(device, NULL);
1227
1228     if (!context->valid)
1229     {
1230         context_release(context);
1231         WARN("Invalid context, skipping blit.\n");
1232         return;
1233     }
1234
1235     gl_info = context->gl_info;
1236
1237     if (src_location == SFLAG_INDRAWABLE)
1238     {
1239         TRACE("Source surface %p is onscreen.\n", src_surface);
1240         buffer = surface_get_gl_buffer(src_surface);
1241         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1242     }
1243     else
1244     {
1245         TRACE("Source surface %p is offscreen.\n", src_surface);
1246         buffer = GL_COLOR_ATTACHMENT0;
1247     }
1248
1249     ENTER_GL();
1250     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1251     glReadBuffer(buffer);
1252     checkGLcall("glReadBuffer()");
1253     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1254     LEAVE_GL();
1255
1256     if (dst_location == SFLAG_INDRAWABLE)
1257     {
1258         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1259         buffer = surface_get_gl_buffer(dst_surface);
1260         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1261     }
1262     else
1263     {
1264         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1265         buffer = GL_COLOR_ATTACHMENT0;
1266     }
1267
1268     ENTER_GL();
1269     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1270     context_set_draw_buffer(context, buffer);
1271     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1272     context_invalidate_state(context, STATE_FRAMEBUFFER);
1273
1274     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1275     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE));
1276     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE1));
1277     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE2));
1278     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE3));
1279
1280     glDisable(GL_SCISSOR_TEST);
1281     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1282
1283     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1284             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1285     checkGLcall("glBlitFramebuffer()");
1286
1287     LEAVE_GL();
1288
1289     if (wined3d_settings.strict_draw_ordering
1290             || (dst_location == SFLAG_INDRAWABLE
1291             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1292         wglFlush();
1293
1294     context_release(context);
1295 }
1296
1297 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1298         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
1299         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
1300 {
1301     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1302         return FALSE;
1303
1304     /* Source and/or destination need to be on the GL side */
1305     if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
1306         return FALSE;
1307
1308     switch (blit_op)
1309     {
1310         case WINED3D_BLIT_OP_COLOR_BLIT:
1311             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1312                 return FALSE;
1313             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1314                 return FALSE;
1315             break;
1316
1317         case WINED3D_BLIT_OP_DEPTH_BLIT:
1318             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1319                 return FALSE;
1320             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1321                 return FALSE;
1322             break;
1323
1324         default:
1325             return FALSE;
1326     }
1327
1328     if (!(src_format->id == dst_format->id
1329             || (is_identity_fixup(src_format->color_fixup)
1330             && is_identity_fixup(dst_format->color_fixup))))
1331         return FALSE;
1332
1333     return TRUE;
1334 }
1335
1336 /* This function checks if the primary render target uses the 8bit paletted format. */
1337 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1338 {
1339     if (device->fb.render_targets && device->fb.render_targets[0])
1340     {
1341         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1342         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1343                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1344             return TRUE;
1345     }
1346     return FALSE;
1347 }
1348
1349 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1350         DWORD color, WINED3DCOLORVALUE *float_color)
1351 {
1352     const struct wined3d_format *format = surface->resource.format;
1353     const struct wined3d_device *device = surface->resource.device;
1354
1355     switch (format->id)
1356     {
1357         case WINED3DFMT_P8_UINT:
1358             if (surface->palette)
1359             {
1360                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1361                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1362                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1363             }
1364             else
1365             {
1366                 float_color->r = 0.0f;
1367                 float_color->g = 0.0f;
1368                 float_color->b = 0.0f;
1369             }
1370             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1371             break;
1372
1373         case WINED3DFMT_B5G6R5_UNORM:
1374             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1375             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1376             float_color->b = (color & 0x1f) / 31.0f;
1377             float_color->a = 1.0f;
1378             break;
1379
1380         case WINED3DFMT_B8G8R8_UNORM:
1381         case WINED3DFMT_B8G8R8X8_UNORM:
1382             float_color->r = D3DCOLOR_R(color);
1383             float_color->g = D3DCOLOR_G(color);
1384             float_color->b = D3DCOLOR_B(color);
1385             float_color->a = 1.0f;
1386             break;
1387
1388         case WINED3DFMT_B8G8R8A8_UNORM:
1389             float_color->r = D3DCOLOR_R(color);
1390             float_color->g = D3DCOLOR_G(color);
1391             float_color->b = D3DCOLOR_B(color);
1392             float_color->a = D3DCOLOR_A(color);
1393             break;
1394
1395         default:
1396             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1397             return FALSE;
1398     }
1399
1400     return TRUE;
1401 }
1402
1403 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1404 {
1405     const struct wined3d_format *format = surface->resource.format;
1406
1407     switch (format->id)
1408     {
1409         case WINED3DFMT_S1_UINT_D15_UNORM:
1410             *float_depth = depth / (float)0x00007fff;
1411             break;
1412
1413         case WINED3DFMT_D16_UNORM:
1414             *float_depth = depth / (float)0x0000ffff;
1415             break;
1416
1417         case WINED3DFMT_D24_UNORM_S8_UINT:
1418         case WINED3DFMT_X8D24_UNORM:
1419             *float_depth = depth / (float)0x00ffffff;
1420             break;
1421
1422         case WINED3DFMT_D32_UNORM:
1423             *float_depth = depth / (float)0xffffffff;
1424             break;
1425
1426         default:
1427             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1428             return FALSE;
1429     }
1430
1431     return TRUE;
1432 }
1433
1434 /* Do not call while under the GL lock. */
1435 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1436 {
1437     const struct wined3d_resource *resource = &surface->resource;
1438     struct wined3d_device *device = resource->device;
1439     const struct blit_shader *blitter;
1440
1441     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1442             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1443     if (!blitter)
1444     {
1445         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1446         return WINED3DERR_INVALIDCALL;
1447     }
1448
1449     return blitter->depth_fill(device, surface, rect, depth);
1450 }
1451
1452 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1453         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1454 {
1455     struct wined3d_device *device = src_surface->resource.device;
1456
1457     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1458             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1459             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1460         return WINED3DERR_INVALIDCALL;
1461
1462     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1463
1464     surface_modify_ds_location(dst_surface, SFLAG_DS_OFFSCREEN,
1465             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1466     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
1467
1468     return WINED3D_OK;
1469 }
1470
1471 /* Do not call while under the GL lock. */
1472 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1473         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1474         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1475 {
1476     struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1477     struct wined3d_device *device = dst_surface->resource.device;
1478     DWORD src_ds_flags, dst_ds_flags;
1479     RECT src_rect, dst_rect;
1480     BOOL scale, convert;
1481
1482     static const DWORD simple_blit = WINEDDBLT_ASYNC
1483             | WINEDDBLT_COLORFILL
1484             | WINEDDBLT_WAIT
1485             | WINEDDBLT_DEPTHFILL
1486             | WINEDDBLT_DONOTWAIT;
1487
1488     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1489             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1490             flags, fx, debug_d3dtexturefiltertype(filter));
1491     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1492
1493     if (fx)
1494     {
1495         TRACE("dwSize %#x.\n", fx->dwSize);
1496         TRACE("dwDDFX %#x.\n", fx->dwDDFX);
1497         TRACE("dwROP %#x.\n", fx->dwROP);
1498         TRACE("dwDDROP %#x.\n", fx->dwDDROP);
1499         TRACE("dwRotationAngle %#x.\n", fx->dwRotationAngle);
1500         TRACE("dwZBufferOpCode %#x.\n", fx->dwZBufferOpCode);
1501         TRACE("dwZBufferLow %#x.\n", fx->dwZBufferLow);
1502         TRACE("dwZBufferHigh %#x.\n", fx->dwZBufferHigh);
1503         TRACE("dwZBufferBaseDest %#x.\n", fx->dwZBufferBaseDest);
1504         TRACE("dwZDestConstBitDepth %#x.\n", fx->dwZDestConstBitDepth);
1505         TRACE("lpDDSZBufferDest %p.\n", fx->u1.lpDDSZBufferDest);
1506         TRACE("dwZSrcConstBitDepth %#x.\n", fx->dwZSrcConstBitDepth);
1507         TRACE("lpDDSZBufferSrc %p.\n", fx->u2.lpDDSZBufferSrc);
1508         TRACE("dwAlphaEdgeBlendBitDepth %#x.\n", fx->dwAlphaEdgeBlendBitDepth);
1509         TRACE("dwAlphaEdgeBlend %#x.\n", fx->dwAlphaEdgeBlend);
1510         TRACE("dwReserved %#x.\n", fx->dwReserved);
1511         TRACE("dwAlphaDestConstBitDepth %#x.\n", fx->dwAlphaDestConstBitDepth);
1512         TRACE("lpDDSAlphaDest %p.\n", fx->u3.lpDDSAlphaDest);
1513         TRACE("dwAlphaSrcConstBitDepth %#x.\n", fx->dwAlphaSrcConstBitDepth);
1514         TRACE("lpDDSAlphaSrc %p.\n", fx->u4.lpDDSAlphaSrc);
1515         TRACE("lpDDSPattern %p.\n", fx->u5.lpDDSPattern);
1516         TRACE("ddckDestColorkey {%#x, %#x}.\n",
1517                 fx->ddckDestColorkey.dwColorSpaceLowValue,
1518                 fx->ddckDestColorkey.dwColorSpaceHighValue);
1519         TRACE("ddckSrcColorkey {%#x, %#x}.\n",
1520                 fx->ddckSrcColorkey.dwColorSpaceLowValue,
1521                 fx->ddckSrcColorkey.dwColorSpaceHighValue);
1522     }
1523
1524     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1525     {
1526         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1527         return WINEDDERR_SURFACEBUSY;
1528     }
1529
1530     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1531
1532     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1533             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1534             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1535             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1536             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1537     {
1538         /* The destination rect can be out of bounds on the condition
1539          * that a clipper is set for the surface. */
1540         if (dst_surface->clipper)
1541             FIXME("Blit clipping not implemented.\n");
1542         else
1543             WARN("The application gave us a bad destination rectangle without a clipper set.\n");
1544         return WINEDDERR_INVALIDRECT;
1545     }
1546
1547     if (src_surface)
1548     {
1549         surface_get_rect(src_surface, src_rect_in, &src_rect);
1550
1551         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1552                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1553                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1554                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1555                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1556         {
1557             WARN("Application gave us bad source rectangle for Blt.\n");
1558             return WINEDDERR_INVALIDRECT;
1559         }
1560     }
1561     else
1562     {
1563         memset(&src_rect, 0, sizeof(src_rect));
1564     }
1565
1566     if (!fx || !(fx->dwDDFX))
1567         flags &= ~WINEDDBLT_DDFX;
1568
1569     if (flags & WINEDDBLT_WAIT)
1570         flags &= ~WINEDDBLT_WAIT;
1571
1572     if (flags & WINEDDBLT_ASYNC)
1573     {
1574         static unsigned int once;
1575
1576         if (!once++)
1577             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1578         flags &= ~WINEDDBLT_ASYNC;
1579     }
1580
1581     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1582     if (flags & WINEDDBLT_DONOTWAIT)
1583     {
1584         static unsigned int once;
1585
1586         if (!once++)
1587             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1588         flags &= ~WINEDDBLT_DONOTWAIT;
1589     }
1590
1591     if (!device->d3d_initialized)
1592     {
1593         WARN("D3D not initialized, using fallback.\n");
1594         goto cpu;
1595     }
1596
1597     /* We want to avoid invalidating the sysmem location for converted
1598      * surfaces, since otherwise we'd have to convert the data back when
1599      * locking them. */
1600     if (dst_surface->flags & SFLAG_CONVERTED)
1601     {
1602         WARN("Converted surface, using CPU blit.\n");
1603         return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1604     }
1605
1606     if (flags & ~simple_blit)
1607     {
1608         WARN("Using fallback for complex blit (%#x).\n", flags);
1609         goto fallback;
1610     }
1611
1612     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1613         src_swapchain = src_surface->container.u.swapchain;
1614     else
1615         src_swapchain = NULL;
1616
1617     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1618         dst_swapchain = dst_surface->container.u.swapchain;
1619     else
1620         dst_swapchain = NULL;
1621
1622     /* This isn't strictly needed. FBO blits for example could deal with
1623      * cross-swapchain blits by first downloading the source to a texture
1624      * before switching to the destination context. We just have this here to
1625      * not have to deal with the issue, since cross-swapchain blits should be
1626      * rare. */
1627     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1628     {
1629         FIXME("Using fallback for cross-swapchain blit.\n");
1630         goto fallback;
1631     }
1632
1633     scale = src_surface
1634             && (src_rect.right - src_rect.left != dst_rect.right - dst_rect.left
1635             || src_rect.bottom - src_rect.top != dst_rect.bottom - dst_rect.top);
1636     convert = src_surface && src_surface->resource.format->id != dst_surface->resource.format->id;
1637
1638     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1639     if (src_surface)
1640         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1641     else
1642         src_ds_flags = 0;
1643
1644     if (src_ds_flags || dst_ds_flags)
1645     {
1646         if (flags & WINEDDBLT_DEPTHFILL)
1647         {
1648             float depth;
1649
1650             TRACE("Depth fill.\n");
1651
1652             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1653                 return WINED3DERR_INVALIDCALL;
1654
1655             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1656                 return WINED3D_OK;
1657         }
1658         else
1659         {
1660             /* Accessing depth / stencil surfaces is supposed to fail while in
1661              * a scene, except for fills, which seem to work. */
1662             if (device->inScene)
1663             {
1664                 WARN("Rejecting depth / stencil access while in scene.\n");
1665                 return WINED3DERR_INVALIDCALL;
1666             }
1667
1668             if (src_ds_flags != dst_ds_flags)
1669             {
1670                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1671                 return WINED3DERR_INVALIDCALL;
1672             }
1673
1674             if (src_rect.top || src_rect.left
1675                     || src_rect.bottom != src_surface->resource.height
1676                     || src_rect.right != src_surface->resource.width)
1677             {
1678                 WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
1679                         wine_dbgstr_rect(&src_rect));
1680                 return WINED3DERR_INVALIDCALL;
1681             }
1682
1683             if (dst_rect.top || dst_rect.left
1684                     || dst_rect.bottom != dst_surface->resource.height
1685                     || dst_rect.right != dst_surface->resource.width)
1686             {
1687                 WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
1688                         wine_dbgstr_rect(&src_rect));
1689                 return WINED3DERR_INVALIDCALL;
1690             }
1691
1692             if (scale)
1693             {
1694                 WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
1695                 return WINED3DERR_INVALIDCALL;
1696             }
1697
1698             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1699                 return WINED3D_OK;
1700         }
1701     }
1702     else
1703     {
1704         /* In principle this would apply to depth blits as well, but we don't
1705          * implement those in the CPU blitter at the moment. */
1706         if ((dst_surface->flags & SFLAG_INSYSMEM)
1707                 && (!src_surface || (src_surface->flags & SFLAG_INSYSMEM)))
1708         {
1709             if (scale)
1710                 TRACE("Not doing sysmem blit because of scaling.\n");
1711             else if (convert)
1712                 TRACE("Not doing sysmem blit because of format conversion.\n");
1713             else
1714                 return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1715         }
1716
1717         if (flags & WINEDDBLT_COLORFILL)
1718         {
1719             WINED3DCOLORVALUE color;
1720
1721             TRACE("Color fill.\n");
1722
1723             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1724                 goto fallback;
1725
1726             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1727                 return WINED3D_OK;
1728         }
1729         else
1730         {
1731             TRACE("Color blit.\n");
1732
1733             /* Use present for back -> front blits. The idea behind this is
1734              * that present is potentially faster than a blit, in particular
1735              * when FBO blits aren't available. Some ddraw applications like
1736              * Half-Life and Prince of Persia 3D use Blt() from the backbuffer
1737              * to the frontbuffer instead of doing a Flip(). D3D8 and D3D9
1738              * applications can't blit directly to the frontbuffer. */
1739             if (dst_swapchain && dst_swapchain->back_buffers
1740                     && dst_surface == dst_swapchain->front_buffer
1741                     && src_surface == dst_swapchain->back_buffers[0])
1742             {
1743                 WINED3DSWAPEFFECT swap_effect = dst_swapchain->presentParms.SwapEffect;
1744
1745                 TRACE("Using present for backbuffer -> frontbuffer blit.\n");
1746
1747                 /* Set the swap effect to COPY, we don't want the backbuffer
1748                  * to become undefined. */
1749                 dst_swapchain->presentParms.SwapEffect = WINED3DSWAPEFFECT_COPY;
1750                 wined3d_swapchain_present(dst_swapchain, NULL, NULL, dst_swapchain->win_handle, NULL, 0);
1751                 dst_swapchain->presentParms.SwapEffect = swap_effect;
1752
1753                 return WINED3D_OK;
1754             }
1755
1756             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1757                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1758                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1759             {
1760                 TRACE("Using FBO blit.\n");
1761
1762                 surface_blt_fbo(device, filter,
1763                         src_surface, src_surface->draw_binding, &src_rect,
1764                         dst_surface, dst_surface->draw_binding, &dst_rect);
1765                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1766                 return WINED3D_OK;
1767             }
1768
1769             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1770                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1771                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1772             {
1773                 TRACE("Using arbfp blit.\n");
1774
1775                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1776                     return WINED3D_OK;
1777             }
1778         }
1779     }
1780
1781 fallback:
1782
1783     /* Special cases for render targets. */
1784     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1785             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1786     {
1787         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1788                 src_surface, &src_rect, flags, fx, filter)))
1789             return WINED3D_OK;
1790     }
1791
1792 cpu:
1793
1794     /* For the rest call the X11 surface implementation. For render targets
1795      * this should be implemented OpenGL accelerated in BltOverride, other
1796      * blits are rather rare. */
1797     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1798 }
1799
1800 /* Do not call while under the GL lock. */
1801 HRESULT CDECL wined3d_surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
1802         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD trans)
1803 {
1804     RECT src_rect, dst_rect;
1805     DWORD flags = 0;
1806
1807     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect_in %s, trans %#x.\n",
1808             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect_in), trans);
1809
1810     surface_get_rect(src_surface, src_rect_in, &src_rect);
1811
1812     dst_rect.left = dst_x;
1813     dst_rect.top = dst_y;
1814     dst_rect.right = dst_x + src_rect.right - src_rect.left;
1815     dst_rect.bottom = dst_y + src_rect.bottom - src_rect.top;
1816
1817     if (trans & WINEDDBLTFAST_SRCCOLORKEY)
1818         flags |= WINEDDBLT_KEYSRC;
1819     if (trans & WINEDDBLTFAST_DESTCOLORKEY)
1820         flags |= WINEDDBLT_KEYDEST;
1821     if (trans & WINEDDBLTFAST_WAIT)
1822         flags |= WINEDDBLT_WAIT;
1823     if (trans & WINEDDBLTFAST_DONOTWAIT)
1824         flags |= WINEDDBLT_DONOTWAIT;
1825
1826     return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, NULL, WINED3DTEXF_POINT);
1827 }
1828
1829 HRESULT CDECL wined3d_surface_get_render_target_data(struct wined3d_surface *surface,
1830         struct wined3d_surface *render_target)
1831 {
1832     TRACE("surface %p, render_target %p.\n", surface, render_target);
1833
1834     /* TODO: Check surface sizes, pools, etc. */
1835
1836     if (render_target->resource.multisample_type)
1837         return WINED3DERR_INVALIDCALL;
1838
1839     return wined3d_surface_blt(surface, NULL, render_target, NULL, 0, NULL, WINED3DTEXF_POINT);
1840 }
1841
1842 /* Context activation is done by the caller. */
1843 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1844 {
1845     if (!surface->resource.heapMemory)
1846     {
1847         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1848         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1849                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1850     }
1851
1852     ENTER_GL();
1853     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1854     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1855     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1856             surface->resource.size, surface->resource.allocatedMemory));
1857     checkGLcall("glGetBufferSubDataARB");
1858     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1859     checkGLcall("glDeleteBuffersARB");
1860     LEAVE_GL();
1861
1862     surface->pbo = 0;
1863     surface->flags &= ~SFLAG_PBO;
1864 }
1865
1866 /* Do not call while under the GL lock. */
1867 static void surface_unload(struct wined3d_resource *resource)
1868 {
1869     struct wined3d_surface *surface = surface_from_resource(resource);
1870     struct wined3d_renderbuffer_entry *entry, *entry2;
1871     struct wined3d_device *device = resource->device;
1872     const struct wined3d_gl_info *gl_info;
1873     struct wined3d_context *context;
1874
1875     TRACE("surface %p.\n", surface);
1876
1877     if (resource->pool == WINED3DPOOL_DEFAULT)
1878     {
1879         /* Default pool resources are supposed to be destroyed before Reset is called.
1880          * Implicit resources stay however. So this means we have an implicit render target
1881          * or depth stencil. The content may be destroyed, but we still have to tear down
1882          * opengl resources, so we cannot leave early.
1883          *
1884          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1885          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1886          * or the depth stencil into an FBO the texture or render buffer will be removed
1887          * and all flags get lost
1888          */
1889         surface_init_sysmem(surface);
1890         /* We also get here when the ddraw swapchain is destroyed, for example
1891          * for a mode switch. In this case this surface won't necessarily be
1892          * an implicit surface. We have to mark it lost so that the
1893          * application can restore it after the mode switch. */
1894         surface->flags |= SFLAG_LOST;
1895     }
1896     else
1897     {
1898         /* Load the surface into system memory */
1899         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1900         surface_modify_location(surface, surface->draw_binding, FALSE);
1901     }
1902     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1903     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1904     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1905
1906     context = context_acquire(device, NULL);
1907     gl_info = context->gl_info;
1908
1909     /* Destroy PBOs, but load them into real sysmem before */
1910     if (surface->flags & SFLAG_PBO)
1911         surface_remove_pbo(surface, gl_info);
1912
1913     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1914      * all application-created targets the application has to release the surface
1915      * before calling _Reset
1916      */
1917     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1918     {
1919         ENTER_GL();
1920         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1921         LEAVE_GL();
1922         list_remove(&entry->entry);
1923         HeapFree(GetProcessHeap(), 0, entry);
1924     }
1925     list_init(&surface->renderbuffers);
1926     surface->current_renderbuffer = NULL;
1927
1928     ENTER_GL();
1929
1930     /* If we're in a texture, the texture name belongs to the texture.
1931      * Otherwise, destroy it. */
1932     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1933     {
1934         glDeleteTextures(1, &surface->texture_name);
1935         surface->texture_name = 0;
1936         glDeleteTextures(1, &surface->texture_name_srgb);
1937         surface->texture_name_srgb = 0;
1938     }
1939     if (surface->rb_multisample)
1940     {
1941         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1942         surface->rb_multisample = 0;
1943     }
1944     if (surface->rb_resolved)
1945     {
1946         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1947         surface->rb_resolved = 0;
1948     }
1949
1950     LEAVE_GL();
1951
1952     context_release(context);
1953
1954     resource_unload(resource);
1955 }
1956
1957 static const struct wined3d_resource_ops surface_resource_ops =
1958 {
1959     surface_unload,
1960 };
1961
1962 static const struct wined3d_surface_ops surface_ops =
1963 {
1964     surface_private_setup,
1965     surface_realize_palette,
1966     surface_draw_overlay,
1967     surface_preload,
1968     surface_map,
1969     surface_unmap,
1970     surface_getdc,
1971 };
1972
1973 /*****************************************************************************
1974  * Initializes the GDI surface, aka creates the DIB section we render to
1975  * The DIB section creation is done by calling GetDC, which will create the
1976  * section and releasing the dc to allow the app to use it. The dib section
1977  * will stay until the surface is released
1978  *
1979  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1980  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1981  * avoid confusion in the shared surface code.
1982  *
1983  * Returns:
1984  *  WINED3D_OK on success
1985  *  The return values of called methods on failure
1986  *
1987  *****************************************************************************/
1988 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1989 {
1990     HRESULT hr;
1991
1992     TRACE("surface %p.\n", surface);
1993
1994     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1995     {
1996         ERR("Overlays not yet supported by GDI surfaces.\n");
1997         return WINED3DERR_INVALIDCALL;
1998     }
1999
2000     /* Sysmem textures have memory already allocated - release it,
2001      * this avoids an unnecessary memcpy. */
2002     hr = surface_create_dib_section(surface);
2003     if (SUCCEEDED(hr))
2004     {
2005         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
2006         surface->resource.heapMemory = NULL;
2007         surface->resource.allocatedMemory = surface->dib.bitmap_data;
2008     }
2009
2010     /* We don't mind the nonpow2 stuff in GDI. */
2011     surface->pow2Width = surface->resource.width;
2012     surface->pow2Height = surface->resource.height;
2013
2014     return WINED3D_OK;
2015 }
2016
2017 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
2018 {
2019     struct wined3d_palette *palette = surface->palette;
2020
2021     TRACE("surface %p.\n", surface);
2022
2023     if (!palette) return;
2024
2025     if (surface->flags & SFLAG_DIBSECTION)
2026     {
2027         RGBQUAD col[256];
2028         unsigned int i;
2029
2030         TRACE("Updating the DC's palette.\n");
2031
2032         for (i = 0; i < 256; ++i)
2033         {
2034             col[i].rgbRed = palette->palents[i].peRed;
2035             col[i].rgbGreen = palette->palents[i].peGreen;
2036             col[i].rgbBlue = palette->palents[i].peBlue;
2037             col[i].rgbReserved = 0;
2038         }
2039         SetDIBColorTable(surface->hDC, 0, 256, col);
2040     }
2041
2042     /* Update the image because of the palette change. Some games like e.g.
2043      * Red Alert call SetEntries a lot to implement fading. */
2044     /* Tell the swapchain to update the screen. */
2045     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2046     {
2047         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2048         if (surface == swapchain->front_buffer)
2049         {
2050             x11_copy_to_screen(swapchain, NULL);
2051         }
2052     }
2053 }
2054
2055 static HRESULT gdi_surface_draw_overlay(struct wined3d_surface *surface)
2056 {
2057     FIXME("GDI surfaces can't draw overlays yet.\n");
2058     return E_FAIL;
2059 }
2060
2061 static void gdi_surface_preload(struct wined3d_surface *surface)
2062 {
2063     TRACE("surface %p.\n", surface);
2064
2065     ERR("Preloading GDI surfaces is not supported.\n");
2066 }
2067
2068 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
2069 {
2070     TRACE("surface %p, rect %s, flags %#x.\n",
2071             surface, wine_dbgstr_rect(rect), flags);
2072
2073     if (!surface->resource.allocatedMemory)
2074     {
2075         /* This happens on gdi surfaces if the application set a user pointer
2076          * and resets it. Recreate the DIB section. */
2077         surface_create_dib_section(surface);
2078         surface->resource.allocatedMemory = surface->dib.bitmap_data;
2079     }
2080 }
2081
2082 static void gdi_surface_unmap(struct wined3d_surface *surface)
2083 {
2084     TRACE("surface %p.\n", surface);
2085
2086     /* Tell the swapchain to update the screen. */
2087     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2088     {
2089         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2090         if (surface == swapchain->front_buffer)
2091         {
2092             x11_copy_to_screen(swapchain, &surface->lockedRect);
2093         }
2094     }
2095
2096     memset(&surface->lockedRect, 0, sizeof(RECT));
2097 }
2098
2099 static HRESULT gdi_surface_getdc(struct wined3d_surface *surface)
2100 {
2101     WINED3DLOCKED_RECT lock;
2102     HRESULT hr;
2103
2104     TRACE("surface %p.\n", surface);
2105
2106     /* Should have a DIB section already. */
2107     if (!(surface->flags & SFLAG_DIBSECTION))
2108     {
2109         WARN("DC not supported on this surface\n");
2110         return WINED3DERR_INVALIDCALL;
2111     }
2112
2113     /* Map the surface. */
2114     hr = wined3d_surface_map(surface, &lock, NULL, 0);
2115     if (FAILED(hr))
2116         ERR("Map failed, hr %#x.\n", hr);
2117
2118     return hr;
2119 }
2120
2121 static const struct wined3d_surface_ops gdi_surface_ops =
2122 {
2123     gdi_surface_private_setup,
2124     gdi_surface_realize_palette,
2125     gdi_surface_draw_overlay,
2126     gdi_surface_preload,
2127     gdi_surface_map,
2128     gdi_surface_unmap,
2129     gdi_surface_getdc,
2130 };
2131
2132 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2133 {
2134     GLuint *name;
2135     DWORD flag;
2136
2137     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2138
2139     if(srgb)
2140     {
2141         name = &surface->texture_name_srgb;
2142         flag = SFLAG_INSRGBTEX;
2143     }
2144     else
2145     {
2146         name = &surface->texture_name;
2147         flag = SFLAG_INTEXTURE;
2148     }
2149
2150     if (!*name && new_name)
2151     {
2152         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2153          * surface has no texture name yet. See if we can get rid of this. */
2154         if (surface->flags & flag)
2155             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2156         surface_modify_location(surface, flag, FALSE);
2157     }
2158
2159     *name = new_name;
2160     surface_force_reload(surface);
2161 }
2162
2163 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
2164 {
2165     TRACE("surface %p, target %#x.\n", surface, target);
2166
2167     if (surface->texture_target != target)
2168     {
2169         if (target == GL_TEXTURE_RECTANGLE_ARB)
2170         {
2171             surface->flags &= ~SFLAG_NORMCOORD;
2172         }
2173         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2174         {
2175             surface->flags |= SFLAG_NORMCOORD;
2176         }
2177     }
2178     surface->texture_target = target;
2179     surface_force_reload(surface);
2180 }
2181
2182 /* Context activation is done by the caller. */
2183 void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
2184 {
2185     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
2186
2187     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2188     {
2189         struct wined3d_texture *texture = surface->container.u.texture;
2190
2191         TRACE("Passing to container (%p).\n", texture);
2192         texture->texture_ops->texture_bind(texture, context, srgb);
2193     }
2194     else
2195     {
2196         if (surface->texture_level)
2197         {
2198             ERR("Standalone surface %p is non-zero texture level %u.\n",
2199                     surface, surface->texture_level);
2200         }
2201
2202         if (srgb)
2203             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
2204
2205         ENTER_GL();
2206
2207         if (!surface->texture_name)
2208         {
2209             glGenTextures(1, &surface->texture_name);
2210             checkGLcall("glGenTextures");
2211
2212             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
2213
2214             context_bind_texture(context, surface->texture_target, surface->texture_name);
2215             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2216             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2217             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
2218             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2219             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2220             checkGLcall("glTexParameteri");
2221         }
2222         else
2223         {
2224             context_bind_texture(context, surface->texture_target, surface->texture_name);
2225         }
2226
2227         LEAVE_GL();
2228     }
2229 }
2230
2231 /* This call just downloads data, the caller is responsible for binding the
2232  * correct texture. */
2233 /* Context activation is done by the caller. */
2234 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2235 {
2236     const struct wined3d_format *format = surface->resource.format;
2237
2238     /* Only support read back of converted P8 surfaces. */
2239     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2240     {
2241         ERR("Trying to read back converted surface %p with format %s.\n", surface, debug_d3dformat(format->id));
2242         return;
2243     }
2244
2245     ENTER_GL();
2246
2247     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2248     {
2249         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2250                 surface, surface->texture_level, format->glFormat, format->glType,
2251                 surface->resource.allocatedMemory);
2252
2253         if (surface->flags & SFLAG_PBO)
2254         {
2255             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2256             checkGLcall("glBindBufferARB");
2257             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2258             checkGLcall("glGetCompressedTexImageARB");
2259             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2260             checkGLcall("glBindBufferARB");
2261         }
2262         else
2263         {
2264             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2265                     surface->texture_level, surface->resource.allocatedMemory));
2266             checkGLcall("glGetCompressedTexImageARB");
2267         }
2268
2269         LEAVE_GL();
2270     }
2271     else
2272     {
2273         void *mem;
2274         GLenum gl_format = format->glFormat;
2275         GLenum gl_type = format->glType;
2276         int src_pitch = 0;
2277         int dst_pitch = 0;
2278
2279         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2280         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2281         {
2282             gl_format = GL_ALPHA;
2283             gl_type = GL_UNSIGNED_BYTE;
2284         }
2285
2286         if (surface->flags & SFLAG_NONPOW2)
2287         {
2288             unsigned char alignment = surface->resource.device->surface_alignment;
2289             src_pitch = format->byte_count * surface->pow2Width;
2290             dst_pitch = wined3d_surface_get_pitch(surface);
2291             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2292             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2293         }
2294         else
2295         {
2296             mem = surface->resource.allocatedMemory;
2297         }
2298
2299         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2300                 surface, surface->texture_level, gl_format, gl_type, mem);
2301
2302         if (surface->flags & SFLAG_PBO)
2303         {
2304             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2305             checkGLcall("glBindBufferARB");
2306
2307             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2308             checkGLcall("glGetTexImage");
2309
2310             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2311             checkGLcall("glBindBufferARB");
2312         }
2313         else
2314         {
2315             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2316             checkGLcall("glGetTexImage");
2317         }
2318         LEAVE_GL();
2319
2320         if (surface->flags & SFLAG_NONPOW2)
2321         {
2322             const BYTE *src_data;
2323             BYTE *dst_data;
2324             UINT y;
2325             /*
2326              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2327              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2328              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2329              *
2330              * We're doing this...
2331              *
2332              * instead of boxing the texture :
2333              * |<-texture width ->|  -->pow2width|   /\
2334              * |111111111111111111|              |   |
2335              * |222 Texture 222222| boxed empty  | texture height
2336              * |3333 Data 33333333|              |   |
2337              * |444444444444444444|              |   \/
2338              * -----------------------------------   |
2339              * |     boxed  empty | boxed empty  | pow2height
2340              * |                  |              |   \/
2341              * -----------------------------------
2342              *
2343              *
2344              * we're repacking the data to the expected texture width
2345              *
2346              * |<-texture width ->|  -->pow2width|   /\
2347              * |111111111111111111222222222222222|   |
2348              * |222333333333333333333444444444444| texture height
2349              * |444444                           |   |
2350              * |                                 |   \/
2351              * |                                 |   |
2352              * |            empty                | pow2height
2353              * |                                 |   \/
2354              * -----------------------------------
2355              *
2356              * == is the same as
2357              *
2358              * |<-texture width ->|    /\
2359              * |111111111111111111|
2360              * |222222222222222222|texture height
2361              * |333333333333333333|
2362              * |444444444444444444|    \/
2363              * --------------------
2364              *
2365              * this also means that any references to allocatedMemory should work with the data as if were a
2366              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2367              *
2368              * internally the texture is still stored in a boxed format so any references to textureName will
2369              * get a boxed texture with width pow2width and not a texture of width resource.width.
2370              *
2371              * Performance should not be an issue, because applications normally do not lock the surfaces when
2372              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2373              * and doesn't have to be re-read. */
2374             src_data = mem;
2375             dst_data = surface->resource.allocatedMemory;
2376             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2377             for (y = 1; y < surface->resource.height; ++y)
2378             {
2379                 /* skip the first row */
2380                 src_data += src_pitch;
2381                 dst_data += dst_pitch;
2382                 memcpy(dst_data, src_data, dst_pitch);
2383             }
2384
2385             HeapFree(GetProcessHeap(), 0, mem);
2386         }
2387     }
2388
2389     /* Surface has now been downloaded */
2390     surface->flags |= SFLAG_INSYSMEM;
2391 }
2392
2393 /* This call just uploads data, the caller is responsible for binding the
2394  * correct texture. */
2395 /* Context activation is done by the caller. */
2396 void surface_upload_data(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2397         const struct wined3d_format *format, const RECT *src_rect, UINT src_w, const POINT *dst_point,
2398         BOOL srgb, const struct wined3d_bo_address *data)
2399 {
2400     UINT update_w = src_rect->right - src_rect->left;
2401     UINT update_h = src_rect->bottom - src_rect->top;
2402
2403     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_w %u, dst_point %p, srgb %#x, data {%#x:%p}.\n",
2404             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_w,
2405             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2406
2407     if (format->heightscale != 1.0f && format->heightscale != 0.0f)
2408         update_h *= format->heightscale;
2409
2410     ENTER_GL();
2411
2412     if (data->buffer_object)
2413     {
2414         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2415         checkGLcall("glBindBufferARB");
2416     }
2417
2418     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2419     {
2420         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2421         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2422         UINT src_pitch = wined3d_format_calculate_size(format, 1, src_w, 1);
2423         const BYTE *addr = data->addr;
2424         GLenum internal;
2425
2426         addr += (src_rect->top / format->block_height) * src_pitch;
2427         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2428
2429         if (srgb)
2430             internal = format->glGammaInternal;
2431         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2432             internal = format->rtInternal;
2433         else
2434             internal = format->glInternal;
2435
2436         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2437                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2438                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2439
2440         if (row_length == src_pitch)
2441         {
2442             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2443                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2444         }
2445         else
2446         {
2447             UINT row, y;
2448
2449             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2450              * can't use the unpack row length like below. */
2451             for (row = 0, y = dst_point->y; row < row_count; ++row)
2452             {
2453                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2454                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2455                 y += format->block_height;
2456                 addr += src_pitch;
2457             }
2458         }
2459         checkGLcall("glCompressedTexSubImage2DARB");
2460     }
2461     else
2462     {
2463         const BYTE *addr = data->addr;
2464
2465         addr += src_rect->top * src_w * format->byte_count;
2466         addr += src_rect->left * format->byte_count;
2467
2468         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2469                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2470                 update_w, update_h, format->glFormat, format->glType, addr);
2471
2472         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_w);
2473         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2474                 update_w, update_h, format->glFormat, format->glType, addr);
2475         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2476         checkGLcall("glTexSubImage2D");
2477     }
2478
2479     if (data->buffer_object)
2480     {
2481         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2482         checkGLcall("glBindBufferARB");
2483     }
2484
2485     LEAVE_GL();
2486
2487     if (wined3d_settings.strict_draw_ordering)
2488         wglFlush();
2489
2490     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2491     {
2492         struct wined3d_device *device = surface->resource.device;
2493         unsigned int i;
2494
2495         for (i = 0; i < device->context_count; ++i)
2496         {
2497             context_surface_update(device->contexts[i], surface);
2498         }
2499     }
2500 }
2501
2502 /* This call just allocates the texture, the caller is responsible for binding
2503  * the correct texture. */
2504 /* Context activation is done by the caller. */
2505 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2506         const struct wined3d_format *format, BOOL srgb)
2507 {
2508     BOOL enable_client_storage = FALSE;
2509     GLsizei width = surface->pow2Width;
2510     GLsizei height = surface->pow2Height;
2511     const BYTE *mem = NULL;
2512     GLenum internal;
2513
2514     if (srgb)
2515     {
2516         internal = format->glGammaInternal;
2517     }
2518     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2519     {
2520         internal = format->rtInternal;
2521     }
2522     else
2523     {
2524         internal = format->glInternal;
2525     }
2526
2527     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2528
2529     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2530             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2531             internal, width, height, format->glFormat, format->glType);
2532
2533     ENTER_GL();
2534
2535     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2536     {
2537         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2538                 || !surface->resource.allocatedMemory)
2539         {
2540             /* In some cases we want to disable client storage.
2541              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2542              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2543              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2544              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2545              */
2546             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2547             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2548             surface->flags &= ~SFLAG_CLIENT;
2549             enable_client_storage = TRUE;
2550         }
2551         else
2552         {
2553             surface->flags |= SFLAG_CLIENT;
2554
2555             /* Point OpenGL to our allocated texture memory. Do not use
2556              * resource.allocatedMemory here because it might point into a
2557              * PBO. Instead use heapMemory, but get the alignment right. */
2558             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2559                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2560         }
2561     }
2562
2563     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2564     {
2565         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2566                 internal, width, height, 0, surface->resource.size, mem));
2567         checkGLcall("glCompressedTexImage2DARB");
2568     }
2569     else
2570     {
2571         glTexImage2D(surface->texture_target, surface->texture_level,
2572                 internal, width, height, 0, format->glFormat, format->glType, mem);
2573         checkGLcall("glTexImage2D");
2574     }
2575
2576     if(enable_client_storage) {
2577         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2578         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2579     }
2580     LEAVE_GL();
2581 }
2582
2583 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2584  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2585 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2586 /* GL locking is done by the caller */
2587 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2588 {
2589     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2590     struct wined3d_renderbuffer_entry *entry;
2591     GLuint renderbuffer = 0;
2592     unsigned int src_width, src_height;
2593     unsigned int width, height;
2594
2595     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2596     {
2597         width = rt->pow2Width;
2598         height = rt->pow2Height;
2599     }
2600     else
2601     {
2602         width = surface->pow2Width;
2603         height = surface->pow2Height;
2604     }
2605
2606     src_width = surface->pow2Width;
2607     src_height = surface->pow2Height;
2608
2609     /* A depth stencil smaller than the render target is not valid */
2610     if (width > src_width || height > src_height) return;
2611
2612     /* Remove any renderbuffer set if the sizes match */
2613     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2614             || (width == src_width && height == src_height))
2615     {
2616         surface->current_renderbuffer = NULL;
2617         return;
2618     }
2619
2620     /* Look if we've already got a renderbuffer of the correct dimensions */
2621     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2622     {
2623         if (entry->width == width && entry->height == height)
2624         {
2625             renderbuffer = entry->id;
2626             surface->current_renderbuffer = entry;
2627             break;
2628         }
2629     }
2630
2631     if (!renderbuffer)
2632     {
2633         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2634         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2635         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2636                 surface->resource.format->glInternal, width, height);
2637
2638         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2639         entry->width = width;
2640         entry->height = height;
2641         entry->id = renderbuffer;
2642         list_add_head(&surface->renderbuffers, &entry->entry);
2643
2644         surface->current_renderbuffer = entry;
2645     }
2646
2647     checkGLcall("set_compatible_renderbuffer");
2648 }
2649
2650 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2651 {
2652     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2653
2654     TRACE("surface %p.\n", surface);
2655
2656     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2657     {
2658         ERR("Surface %p is not on a swapchain.\n", surface);
2659         return GL_NONE;
2660     }
2661
2662     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2663     {
2664         if (swapchain->render_to_fbo)
2665         {
2666             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2667             return GL_COLOR_ATTACHMENT0;
2668         }
2669         TRACE("Returning GL_BACK\n");
2670         return GL_BACK;
2671     }
2672     else if (surface == swapchain->front_buffer)
2673     {
2674         TRACE("Returning GL_FRONT\n");
2675         return GL_FRONT;
2676     }
2677
2678     FIXME("Higher back buffer, returning GL_BACK\n");
2679     return GL_BACK;
2680 }
2681
2682 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2683 void surface_add_dirty_rect(struct wined3d_surface *surface, const WINED3DBOX *dirty_rect)
2684 {
2685     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2686
2687     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2688         /* No partial locking for textures yet. */
2689         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2690
2691     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2692     if (dirty_rect)
2693     {
2694         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->Left);
2695         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->Top);
2696         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->Right);
2697         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->Bottom);
2698     }
2699     else
2700     {
2701         surface->dirtyRect.left = 0;
2702         surface->dirtyRect.top = 0;
2703         surface->dirtyRect.right = surface->resource.width;
2704         surface->dirtyRect.bottom = surface->resource.height;
2705     }
2706
2707     /* if the container is a texture then mark it dirty. */
2708     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2709     {
2710         TRACE("Passing to container.\n");
2711         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2712     }
2713 }
2714
2715 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2716 {
2717     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2718     BOOL ck_changed;
2719
2720     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2721
2722     if (surface->resource.pool == WINED3DPOOL_SCRATCH)
2723     {
2724         ERR("Not supported on scratch surfaces.\n");
2725         return WINED3DERR_INVALIDCALL;
2726     }
2727
2728     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2729
2730     /* Reload if either the texture and sysmem have different ideas about the
2731      * color key, or the actual key values changed. */
2732     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2733             && (surface->glCKey.dwColorSpaceLowValue != surface->SrcBltCKey.dwColorSpaceLowValue
2734             || surface->glCKey.dwColorSpaceHighValue != surface->SrcBltCKey.dwColorSpaceHighValue)))
2735     {
2736         TRACE("Reloading because of color keying\n");
2737         /* To perform the color key conversion we need a sysmem copy of
2738          * the surface. Make sure we have it. */
2739
2740         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2741         /* Make sure the texture is reloaded because of the color key change,
2742          * this kills performance though :( */
2743         /* TODO: This is not necessarily needed with hw palettized texture support. */
2744         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2745         /* Switching color keying on / off may change the internal format. */
2746         if (ck_changed)
2747             surface_force_reload(surface);
2748     }
2749     else if (!(surface->flags & flag))
2750     {
2751         TRACE("Reloading because surface is dirty.\n");
2752     }
2753     else
2754     {
2755         TRACE("surface is already in texture\n");
2756         return WINED3D_OK;
2757     }
2758
2759     /* No partial locking for textures yet. */
2760     surface_load_location(surface, flag, NULL);
2761     surface_evict_sysmem(surface);
2762
2763     return WINED3D_OK;
2764 }
2765
2766 /* See also float_16_to_32() in wined3d_private.h */
2767 static inline unsigned short float_32_to_16(const float *in)
2768 {
2769     int exp = 0;
2770     float tmp = fabsf(*in);
2771     unsigned int mantissa;
2772     unsigned short ret;
2773
2774     /* Deal with special numbers */
2775     if (*in == 0.0f)
2776         return 0x0000;
2777     if (isnan(*in))
2778         return 0x7c01;
2779     if (isinf(*in))
2780         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2781
2782     if (tmp < powf(2, 10))
2783     {
2784         do
2785         {
2786             tmp = tmp * 2.0f;
2787             exp--;
2788         } while (tmp < powf(2, 10));
2789     }
2790     else if (tmp >= powf(2, 11))
2791     {
2792         do
2793         {
2794             tmp /= 2.0f;
2795             exp++;
2796         } while (tmp >= powf(2, 11));
2797     }
2798
2799     mantissa = (unsigned int)tmp;
2800     if (tmp - mantissa >= 0.5f)
2801         ++mantissa; /* Round to nearest, away from zero. */
2802
2803     exp += 10;  /* Normalize the mantissa. */
2804     exp += 15;  /* Exponent is encoded with excess 15. */
2805
2806     if (exp > 30) /* too big */
2807     {
2808         ret = 0x7c00; /* INF */
2809     }
2810     else if (exp <= 0)
2811     {
2812         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2813         while (exp <= 0)
2814         {
2815             mantissa = mantissa >> 1;
2816             ++exp;
2817         }
2818         ret = mantissa & 0x3ff;
2819     }
2820     else
2821     {
2822         ret = (exp << 10) | (mantissa & 0x3ff);
2823     }
2824
2825     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2826     return ret;
2827 }
2828
2829 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2830 {
2831     ULONG refcount;
2832
2833     TRACE("Surface %p, container %p of type %#x.\n",
2834             surface, surface->container.u.base, surface->container.type);
2835
2836     switch (surface->container.type)
2837     {
2838         case WINED3D_CONTAINER_TEXTURE:
2839             return wined3d_texture_incref(surface->container.u.texture);
2840
2841         case WINED3D_CONTAINER_SWAPCHAIN:
2842             return wined3d_swapchain_incref(surface->container.u.swapchain);
2843
2844         default:
2845             ERR("Unhandled container type %#x.\n", surface->container.type);
2846         case WINED3D_CONTAINER_NONE:
2847             break;
2848     }
2849
2850     refcount = InterlockedIncrement(&surface->resource.ref);
2851     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2852
2853     return refcount;
2854 }
2855
2856 /* Do not call while under the GL lock. */
2857 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2858 {
2859     ULONG refcount;
2860
2861     TRACE("Surface %p, container %p of type %#x.\n",
2862             surface, surface->container.u.base, surface->container.type);
2863
2864     switch (surface->container.type)
2865     {
2866         case WINED3D_CONTAINER_TEXTURE:
2867             return wined3d_texture_decref(surface->container.u.texture);
2868
2869         case WINED3D_CONTAINER_SWAPCHAIN:
2870             return wined3d_swapchain_decref(surface->container.u.swapchain);
2871
2872         default:
2873             ERR("Unhandled container type %#x.\n", surface->container.type);
2874         case WINED3D_CONTAINER_NONE:
2875             break;
2876     }
2877
2878     refcount = InterlockedDecrement(&surface->resource.ref);
2879     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2880
2881     if (!refcount)
2882     {
2883         surface_cleanup(surface);
2884         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2885
2886         TRACE("Destroyed surface %p.\n", surface);
2887         HeapFree(GetProcessHeap(), 0, surface);
2888     }
2889
2890     return refcount;
2891 }
2892
2893 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2894 {
2895     return resource_set_priority(&surface->resource, priority);
2896 }
2897
2898 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2899 {
2900     return resource_get_priority(&surface->resource);
2901 }
2902
2903 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2904 {
2905     TRACE("surface %p.\n", surface);
2906
2907     surface->surface_ops->surface_preload(surface);
2908 }
2909
2910 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2911 {
2912     TRACE("surface %p.\n", surface);
2913
2914     return surface->resource.parent;
2915 }
2916
2917 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2918 {
2919     TRACE("surface %p.\n", surface);
2920
2921     return &surface->resource;
2922 }
2923
2924 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2925 {
2926     TRACE("surface %p, flags %#x.\n", surface, flags);
2927
2928     switch (flags)
2929     {
2930         case WINEDDGBS_CANBLT:
2931         case WINEDDGBS_ISBLTDONE:
2932             return WINED3D_OK;
2933
2934         default:
2935             return WINED3DERR_INVALIDCALL;
2936     }
2937 }
2938
2939 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2940 {
2941     TRACE("surface %p, flags %#x.\n", surface, flags);
2942
2943     /* XXX: DDERR_INVALIDSURFACETYPE */
2944
2945     switch (flags)
2946     {
2947         case WINEDDGFS_CANFLIP:
2948         case WINEDDGFS_ISFLIPDONE:
2949             return WINED3D_OK;
2950
2951         default:
2952             return WINED3DERR_INVALIDCALL;
2953     }
2954 }
2955
2956 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2957 {
2958     TRACE("surface %p.\n", surface);
2959
2960     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2961     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2962 }
2963
2964 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2965 {
2966     TRACE("surface %p.\n", surface);
2967
2968     surface->flags &= ~SFLAG_LOST;
2969     return WINED3D_OK;
2970 }
2971
2972 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2973 {
2974     TRACE("surface %p, palette %p.\n", surface, palette);
2975
2976     if (surface->palette == palette)
2977     {
2978         TRACE("Nop palette change.\n");
2979         return WINED3D_OK;
2980     }
2981
2982     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
2983         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
2984
2985     surface->palette = palette;
2986
2987     if (palette)
2988     {
2989         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
2990             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
2991
2992         surface->surface_ops->surface_realize_palette(surface);
2993     }
2994
2995     return WINED3D_OK;
2996 }
2997
2998 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
2999         DWORD flags, const WINEDDCOLORKEY *color_key)
3000 {
3001     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
3002
3003     if (flags & WINEDDCKEY_COLORSPACE)
3004     {
3005         FIXME(" colorkey value not supported (%08x) !\n", flags);
3006         return WINED3DERR_INVALIDCALL;
3007     }
3008
3009     /* Dirtify the surface, but only if a key was changed. */
3010     if (color_key)
3011     {
3012         switch (flags & ~WINEDDCKEY_COLORSPACE)
3013         {
3014             case WINEDDCKEY_DESTBLT:
3015                 surface->DestBltCKey = *color_key;
3016                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3017                 break;
3018
3019             case WINEDDCKEY_DESTOVERLAY:
3020                 surface->DestOverlayCKey = *color_key;
3021                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3022                 break;
3023
3024             case WINEDDCKEY_SRCOVERLAY:
3025                 surface->SrcOverlayCKey = *color_key;
3026                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3027                 break;
3028
3029             case WINEDDCKEY_SRCBLT:
3030                 surface->SrcBltCKey = *color_key;
3031                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3032                 break;
3033         }
3034     }
3035     else
3036     {
3037         switch (flags & ~WINEDDCKEY_COLORSPACE)
3038         {
3039             case WINEDDCKEY_DESTBLT:
3040                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3041                 break;
3042
3043             case WINEDDCKEY_DESTOVERLAY:
3044                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3045                 break;
3046
3047             case WINEDDCKEY_SRCOVERLAY:
3048                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3049                 break;
3050
3051             case WINEDDCKEY_SRCBLT:
3052                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3053                 break;
3054         }
3055     }
3056
3057     return WINED3D_OK;
3058 }
3059
3060 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3061 {
3062     TRACE("surface %p.\n", surface);
3063
3064     return surface->palette;
3065 }
3066
3067 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3068 {
3069     const struct wined3d_format *format = surface->resource.format;
3070     DWORD pitch;
3071
3072     TRACE("surface %p.\n", surface);
3073
3074     if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3075     {
3076         /* Since compressed formats are block based, pitch means the amount of
3077          * bytes to the next row of block rather than the next row of pixels. */
3078         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3079         pitch = row_block_count * format->block_byte_count;
3080     }
3081     else
3082     {
3083         unsigned char alignment = surface->resource.device->surface_alignment;
3084         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3085         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3086     }
3087
3088     TRACE("Returning %u.\n", pitch);
3089
3090     return pitch;
3091 }
3092
3093 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3094 {
3095     TRACE("surface %p, mem %p.\n", surface, mem);
3096
3097     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
3098     {
3099         WARN("Surface is locked or the DC is in use.\n");
3100         return WINED3DERR_INVALIDCALL;
3101     }
3102
3103     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3104     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3105     {
3106         ERR("Not supported on render targets.\n");
3107         return WINED3DERR_INVALIDCALL;
3108     }
3109
3110     if (mem && mem != surface->resource.allocatedMemory)
3111     {
3112         void *release = NULL;
3113
3114         /* Do I have to copy the old surface content? */
3115         if (surface->flags & SFLAG_DIBSECTION)
3116         {
3117             SelectObject(surface->hDC, surface->dib.holdbitmap);
3118             DeleteDC(surface->hDC);
3119             /* Release the DIB section. */
3120             DeleteObject(surface->dib.DIBsection);
3121             surface->dib.bitmap_data = NULL;
3122             surface->resource.allocatedMemory = NULL;
3123             surface->hDC = NULL;
3124             surface->flags &= ~SFLAG_DIBSECTION;
3125         }
3126         else if (!(surface->flags & SFLAG_USERPTR))
3127         {
3128             release = surface->resource.heapMemory;
3129             surface->resource.heapMemory = NULL;
3130         }
3131         surface->resource.allocatedMemory = mem;
3132         surface->flags |= SFLAG_USERPTR;
3133
3134         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3135         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3136
3137         /* For client textures OpenGL has to be notified. */
3138         if (surface->flags & SFLAG_CLIENT)
3139             surface_release_client_storage(surface);
3140
3141         /* Now free the old memory if any. */
3142         HeapFree(GetProcessHeap(), 0, release);
3143     }
3144     else if (surface->flags & SFLAG_USERPTR)
3145     {
3146         /* HeapMemory should be NULL already. */
3147         if (surface->resource.heapMemory)
3148             ERR("User pointer surface has heap memory allocated.\n");
3149
3150         if (!mem)
3151         {
3152             surface->resource.allocatedMemory = NULL;
3153             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3154
3155             if (surface->flags & SFLAG_CLIENT)
3156                 surface_release_client_storage(surface);
3157
3158             surface_prepare_system_memory(surface);
3159         }
3160
3161         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3162     }
3163
3164     return WINED3D_OK;
3165 }
3166
3167 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3168 {
3169     LONG w, h;
3170
3171     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3172
3173     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3174     {
3175         WARN("Not an overlay surface.\n");
3176         return WINEDDERR_NOTAOVERLAYSURFACE;
3177     }
3178
3179     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3180     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3181     surface->overlay_destrect.left = x;
3182     surface->overlay_destrect.top = y;
3183     surface->overlay_destrect.right = x + w;
3184     surface->overlay_destrect.bottom = y + h;
3185
3186     surface->surface_ops->surface_draw_overlay(surface);
3187
3188     return WINED3D_OK;
3189 }
3190
3191 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3192 {
3193     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3194
3195     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3196     {
3197         TRACE("Not an overlay surface.\n");
3198         return WINEDDERR_NOTAOVERLAYSURFACE;
3199     }
3200
3201     if (!surface->overlay_dest)
3202     {
3203         TRACE("Overlay not visible.\n");
3204         *x = 0;
3205         *y = 0;
3206         return WINEDDERR_OVERLAYNOTVISIBLE;
3207     }
3208
3209     *x = surface->overlay_destrect.left;
3210     *y = surface->overlay_destrect.top;
3211
3212     TRACE("Returning position %d, %d.\n", *x, *y);
3213
3214     return WINED3D_OK;
3215 }
3216
3217 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3218         DWORD flags, struct wined3d_surface *ref)
3219 {
3220     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3221
3222     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3223     {
3224         TRACE("Not an overlay surface.\n");
3225         return WINEDDERR_NOTAOVERLAYSURFACE;
3226     }
3227
3228     return WINED3D_OK;
3229 }
3230
3231 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3232         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3233 {
3234     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3235             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3236
3237     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3238     {
3239         WARN("Not an overlay surface.\n");
3240         return WINEDDERR_NOTAOVERLAYSURFACE;
3241     }
3242     else if (!dst_surface)
3243     {
3244         WARN("Dest surface is NULL.\n");
3245         return WINED3DERR_INVALIDCALL;
3246     }
3247
3248     if (src_rect)
3249     {
3250         surface->overlay_srcrect = *src_rect;
3251     }
3252     else
3253     {
3254         surface->overlay_srcrect.left = 0;
3255         surface->overlay_srcrect.top = 0;
3256         surface->overlay_srcrect.right = surface->resource.width;
3257         surface->overlay_srcrect.bottom = surface->resource.height;
3258     }
3259
3260     if (dst_rect)
3261     {
3262         surface->overlay_destrect = *dst_rect;
3263     }
3264     else
3265     {
3266         surface->overlay_destrect.left = 0;
3267         surface->overlay_destrect.top = 0;
3268         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3269         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3270     }
3271
3272     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3273     {
3274         surface->overlay_dest = NULL;
3275         list_remove(&surface->overlay_entry);
3276     }
3277
3278     if (flags & WINEDDOVER_SHOW)
3279     {
3280         if (surface->overlay_dest != dst_surface)
3281         {
3282             surface->overlay_dest = dst_surface;
3283             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3284         }
3285     }
3286     else if (flags & WINEDDOVER_HIDE)
3287     {
3288         /* tests show that the rectangles are erased on hide */
3289         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3290         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3291         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3292         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3293         surface->overlay_dest = NULL;
3294     }
3295
3296     surface->surface_ops->surface_draw_overlay(surface);
3297
3298     return WINED3D_OK;
3299 }
3300
3301 HRESULT CDECL wined3d_surface_set_clipper(struct wined3d_surface *surface, struct wined3d_clipper *clipper)
3302 {
3303     TRACE("surface %p, clipper %p.\n", surface, clipper);
3304
3305     surface->clipper = clipper;
3306
3307     return WINED3D_OK;
3308 }
3309
3310 struct wined3d_clipper * CDECL wined3d_surface_get_clipper(const struct wined3d_surface *surface)
3311 {
3312     TRACE("surface %p.\n", surface);
3313
3314     return surface->clipper;
3315 }
3316
3317 HRESULT CDECL wined3d_surface_set_format(struct wined3d_surface *surface, enum wined3d_format_id format_id)
3318 {
3319     const struct wined3d_format *format = wined3d_get_format(&surface->resource.device->adapter->gl_info, format_id);
3320
3321     TRACE("surface %p, format %s.\n", surface, debug_d3dformat(format_id));
3322
3323     if (surface->resource.format->id != WINED3DFMT_UNKNOWN)
3324     {
3325         FIXME("The format of the surface must be WINED3DFORMAT_UNKNOWN.\n");
3326         return WINED3DERR_INVALIDCALL;
3327     }
3328
3329     surface->resource.size = wined3d_format_calculate_size(format, surface->resource.device->surface_alignment,
3330             surface->pow2Width, surface->pow2Height);
3331     surface->flags |= (WINED3DFMT_D16_LOCKABLE == format_id) ? SFLAG_LOCKABLE : 0;
3332     surface->resource.format = format;
3333
3334     TRACE("size %u, byte_count %u\n", surface->resource.size, format->byte_count);
3335     TRACE("glFormat %#x, glInternal %#x, glType %#x.\n",
3336             format->glFormat, format->glInternal, format->glType);
3337
3338     return WINED3D_OK;
3339 }
3340
3341 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3342         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3343 {
3344     unsigned short *dst_s;
3345     const float *src_f;
3346     unsigned int x, y;
3347
3348     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3349
3350     for (y = 0; y < h; ++y)
3351     {
3352         src_f = (const float *)(src + y * pitch_in);
3353         dst_s = (unsigned short *) (dst + y * pitch_out);
3354         for (x = 0; x < w; ++x)
3355         {
3356             dst_s[x] = float_32_to_16(src_f + x);
3357         }
3358     }
3359 }
3360
3361 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3362         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3363 {
3364     static const unsigned char convert_5to8[] =
3365     {
3366         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3367         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3368         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3369         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3370     };
3371     static const unsigned char convert_6to8[] =
3372     {
3373         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3374         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3375         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3376         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3377         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3378         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3379         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3380         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3381     };
3382     unsigned int x, y;
3383
3384     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3385
3386     for (y = 0; y < h; ++y)
3387     {
3388         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3389         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3390         for (x = 0; x < w; ++x)
3391         {
3392             WORD pixel = src_line[x];
3393             dst_line[x] = 0xff000000
3394                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3395                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3396                     | convert_5to8[(pixel & 0x001f)];
3397         }
3398     }
3399 }
3400
3401 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3402  * in both cases we're just setting the X / Alpha channel to 0xff. */
3403 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3404         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3405 {
3406     unsigned int x, y;
3407
3408     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3409
3410     for (y = 0; y < h; ++y)
3411     {
3412         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3413         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3414
3415         for (x = 0; x < w; ++x)
3416         {
3417             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3418         }
3419     }
3420 }
3421
3422 static inline BYTE cliptobyte(int x)
3423 {
3424     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3425 }
3426
3427 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3428         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3429 {
3430     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3431     unsigned int x, y;
3432
3433     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3434
3435     for (y = 0; y < h; ++y)
3436     {
3437         const BYTE *src_line = src + y * pitch_in;
3438         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3439         for (x = 0; x < w; ++x)
3440         {
3441             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3442              *     C = Y - 16; D = U - 128; E = V - 128;
3443              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3444              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3445              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3446              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3447              * U and V are shared between the pixels. */
3448             if (!(x & 1)) /* For every even pixel, read new U and V. */
3449             {
3450                 d = (int) src_line[1] - 128;
3451                 e = (int) src_line[3] - 128;
3452                 r2 = 409 * e + 128;
3453                 g2 = - 100 * d - 208 * e + 128;
3454                 b2 = 516 * d + 128;
3455             }
3456             c2 = 298 * ((int) src_line[0] - 16);
3457             dst_line[x] = 0xff000000
3458                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3459                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3460                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3461                 /* Scale RGB values to 0..255 range,
3462                  * then clip them if still not in range (may be negative),
3463                  * then shift them within DWORD if necessary. */
3464             src_line += 2;
3465         }
3466     }
3467 }
3468
3469 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3470         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3471 {
3472     unsigned int x, y;
3473     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3474
3475     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3476
3477     for (y = 0; y < h; ++y)
3478     {
3479         const BYTE *src_line = src + y * pitch_in;
3480         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3481         for (x = 0; x < w; ++x)
3482         {
3483             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3484              *     C = Y - 16; D = U - 128; E = V - 128;
3485              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3486              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3487              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3488              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3489              * U and V are shared between the pixels. */
3490             if (!(x & 1)) /* For every even pixel, read new U and V. */
3491             {
3492                 d = (int) src_line[1] - 128;
3493                 e = (int) src_line[3] - 128;
3494                 r2 = 409 * e + 128;
3495                 g2 = - 100 * d - 208 * e + 128;
3496                 b2 = 516 * d + 128;
3497             }
3498             c2 = 298 * ((int) src_line[0] - 16);
3499             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3500                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3501                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3502                 /* Scale RGB values to 0..255 range,
3503                  * then clip them if still not in range (may be negative),
3504                  * then shift them within DWORD if necessary. */
3505             src_line += 2;
3506         }
3507     }
3508 }
3509
3510 struct d3dfmt_convertor_desc
3511 {
3512     enum wined3d_format_id from, to;
3513     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3514 };
3515
3516 static const struct d3dfmt_convertor_desc convertors[] =
3517 {
3518     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3519     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3520     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3521     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3522     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3523     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3524 };
3525
3526 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3527         enum wined3d_format_id to)
3528 {
3529     unsigned int i;
3530
3531     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3532     {
3533         if (convertors[i].from == from && convertors[i].to == to)
3534             return &convertors[i];
3535     }
3536
3537     return NULL;
3538 }
3539
3540 /*****************************************************************************
3541  * surface_convert_format
3542  *
3543  * Creates a duplicate of a surface in a different format. Is used by Blt to
3544  * blit between surfaces with different formats.
3545  *
3546  * Parameters
3547  *  source: Source surface
3548  *  fmt: Requested destination format
3549  *
3550  *****************************************************************************/
3551 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3552 {
3553     const struct d3dfmt_convertor_desc *conv;
3554     WINED3DLOCKED_RECT lock_src, lock_dst;
3555     struct wined3d_surface *ret = NULL;
3556     HRESULT hr;
3557
3558     conv = find_convertor(source->resource.format->id, to_fmt);
3559     if (!conv)
3560     {
3561         FIXME("Cannot find a conversion function from format %s to %s.\n",
3562                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3563         return NULL;
3564     }
3565
3566     wined3d_surface_create(source->resource.device, source->resource.width,
3567             source->resource.height, to_fmt, TRUE /* lockable */, TRUE /* discard  */, 0 /* level */,
3568             0 /* usage */, WINED3DPOOL_SCRATCH, WINED3DMULTISAMPLE_NONE /* TODO: Multisampled conversion */,
3569             0 /* MultiSampleQuality */, source->surface_type, NULL /* parent */, &wined3d_null_parent_ops, &ret);
3570     if (!ret)
3571     {
3572         ERR("Failed to create a destination surface for conversion.\n");
3573         return NULL;
3574     }
3575
3576     memset(&lock_src, 0, sizeof(lock_src));
3577     memset(&lock_dst, 0, sizeof(lock_dst));
3578
3579     hr = wined3d_surface_map(source, &lock_src, NULL, WINED3DLOCK_READONLY);
3580     if (FAILED(hr))
3581     {
3582         ERR("Failed to lock the source surface.\n");
3583         wined3d_surface_decref(ret);
3584         return NULL;
3585     }
3586     hr = wined3d_surface_map(ret, &lock_dst, NULL, WINED3DLOCK_READONLY);
3587     if (FAILED(hr))
3588     {
3589         ERR("Failed to lock the destination surface.\n");
3590         wined3d_surface_unmap(source);
3591         wined3d_surface_decref(ret);
3592         return NULL;
3593     }
3594
3595     conv->convert(lock_src.pBits, lock_dst.pBits, lock_src.Pitch, lock_dst.Pitch,
3596             source->resource.width, source->resource.height);
3597
3598     wined3d_surface_unmap(ret);
3599     wined3d_surface_unmap(source);
3600
3601     return ret;
3602 }
3603
3604 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3605         unsigned int bpp, UINT pitch, DWORD color)
3606 {
3607     BYTE *first;
3608     int x, y;
3609
3610     /* Do first row */
3611
3612 #define COLORFILL_ROW(type) \
3613 do { \
3614     type *d = (type *)buf; \
3615     for (x = 0; x < width; ++x) \
3616         d[x] = (type)color; \
3617 } while(0)
3618
3619     switch (bpp)
3620     {
3621         case 1:
3622             COLORFILL_ROW(BYTE);
3623             break;
3624
3625         case 2:
3626             COLORFILL_ROW(WORD);
3627             break;
3628
3629         case 3:
3630         {
3631             BYTE *d = buf;
3632             for (x = 0; x < width; ++x, d += 3)
3633             {
3634                 d[0] = (color      ) & 0xFF;
3635                 d[1] = (color >>  8) & 0xFF;
3636                 d[2] = (color >> 16) & 0xFF;
3637             }
3638             break;
3639         }
3640         case 4:
3641             COLORFILL_ROW(DWORD);
3642             break;
3643
3644         default:
3645             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3646             return WINED3DERR_NOTAVAILABLE;
3647     }
3648
3649 #undef COLORFILL_ROW
3650
3651     /* Now copy first row. */
3652     first = buf;
3653     for (y = 1; y < height; ++y)
3654     {
3655         buf += pitch;
3656         memcpy(buf, first, width * bpp);
3657     }
3658
3659     return WINED3D_OK;
3660 }
3661
3662 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3663 {
3664     TRACE("surface %p.\n", surface);
3665
3666     if (!(surface->flags & SFLAG_LOCKED))
3667     {
3668         WARN("Trying to unmap unmapped surface.\n");
3669         return WINEDDERR_NOTLOCKED;
3670     }
3671     surface->flags &= ~SFLAG_LOCKED;
3672
3673     surface->surface_ops->surface_unmap(surface);
3674
3675     return WINED3D_OK;
3676 }
3677
3678 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3679         WINED3DLOCKED_RECT *locked_rect, const RECT *rect, DWORD flags)
3680 {
3681     TRACE("surface %p, locked_rect %p, rect %s, flags %#x.\n",
3682             surface, locked_rect, wine_dbgstr_rect(rect), flags);
3683
3684     if (surface->flags & SFLAG_LOCKED)
3685     {
3686         WARN("Surface is already mapped.\n");
3687         return WINED3DERR_INVALIDCALL;
3688     }
3689     surface->flags |= SFLAG_LOCKED;
3690
3691     if (!(surface->flags & SFLAG_LOCKABLE))
3692         WARN("Trying to lock unlockable surface.\n");
3693
3694     surface->surface_ops->surface_map(surface, rect, flags);
3695
3696     locked_rect->Pitch = wined3d_surface_get_pitch(surface);
3697
3698     if (!rect)
3699     {
3700         locked_rect->pBits = surface->resource.allocatedMemory;
3701         surface->lockedRect.left = 0;
3702         surface->lockedRect.top = 0;
3703         surface->lockedRect.right = surface->resource.width;
3704         surface->lockedRect.bottom = surface->resource.height;
3705     }
3706     else
3707     {
3708         const struct wined3d_format *format = surface->resource.format;
3709
3710         if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3711         {
3712             /* Compressed textures are block based, so calculate the offset of
3713              * the block that contains the top-left pixel of the locked rectangle. */
3714             locked_rect->pBits = surface->resource.allocatedMemory
3715                     + ((rect->top / format->block_height) * locked_rect->Pitch)
3716                     + ((rect->left / format->block_width) * format->block_byte_count);
3717         }
3718         else
3719         {
3720             locked_rect->pBits = surface->resource.allocatedMemory
3721                     + (locked_rect->Pitch * rect->top)
3722                     + (rect->left * format->byte_count);
3723         }
3724         surface->lockedRect.left = rect->left;
3725         surface->lockedRect.top = rect->top;
3726         surface->lockedRect.right = rect->right;
3727         surface->lockedRect.bottom = rect->bottom;
3728     }
3729
3730     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3731     TRACE("Returning memory %p, pitch %u.\n", locked_rect->pBits, locked_rect->Pitch);
3732
3733     return WINED3D_OK;
3734 }
3735
3736 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3737 {
3738     HRESULT hr;
3739
3740     TRACE("surface %p, dc %p.\n", surface, dc);
3741
3742     if (surface->flags & SFLAG_USERPTR)
3743     {
3744         ERR("Not supported on surfaces with application-provided memory.\n");
3745         return WINEDDERR_NODC;
3746     }
3747
3748     /* Give more detailed info for ddraw. */
3749     if (surface->flags & SFLAG_DCINUSE)
3750         return WINEDDERR_DCALREADYCREATED;
3751
3752     /* Can't GetDC if the surface is locked. */
3753     if (surface->flags & SFLAG_LOCKED)
3754         return WINED3DERR_INVALIDCALL;
3755
3756     hr = surface->surface_ops->surface_getdc(surface);
3757     if (FAILED(hr))
3758         return hr;
3759
3760     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3761             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3762     {
3763         /* GetDC on palettized formats is unsupported in D3D9, and the method
3764          * is missing in D3D8, so this should only be used for DX <=7
3765          * surfaces (with non-device palettes). */
3766         const PALETTEENTRY *pal = NULL;
3767
3768         if (surface->palette)
3769         {
3770             pal = surface->palette->palents;
3771         }
3772         else
3773         {
3774             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3775             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3776
3777             if (dds_primary && dds_primary->palette)
3778                 pal = dds_primary->palette->palents;
3779         }
3780
3781         if (pal)
3782         {
3783             RGBQUAD col[256];
3784             unsigned int i;
3785
3786             for (i = 0; i < 256; ++i)
3787             {
3788                 col[i].rgbRed = pal[i].peRed;
3789                 col[i].rgbGreen = pal[i].peGreen;
3790                 col[i].rgbBlue = pal[i].peBlue;
3791                 col[i].rgbReserved = 0;
3792             }
3793             SetDIBColorTable(surface->hDC, 0, 256, col);
3794         }
3795     }
3796
3797     surface->flags |= SFLAG_DCINUSE;
3798
3799     *dc = surface->hDC;
3800     TRACE("Returning dc %p.\n", *dc);
3801
3802     return WINED3D_OK;
3803 }
3804
3805 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3806 {
3807     TRACE("surface %p, dc %p.\n", surface, dc);
3808
3809     if (!(surface->flags & SFLAG_DCINUSE))
3810         return WINEDDERR_NODC;
3811
3812     if (surface->hDC != dc)
3813     {
3814         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3815                 dc, surface->hDC);
3816         return WINEDDERR_NODC;
3817     }
3818
3819     /* Copy the contents of the DIB over to the PBO. */
3820     if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
3821         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
3822
3823     /* We locked first, so unlock now. */
3824     wined3d_surface_unmap(surface);
3825
3826     surface->flags &= ~SFLAG_DCINUSE;
3827
3828     return WINED3D_OK;
3829 }
3830
3831 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3832 {
3833     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3834
3835     if (flags)
3836     {
3837         static UINT once;
3838         if (!once++)
3839             FIXME("Ignoring flags %#x.\n", flags);
3840         else
3841             WARN("Ignoring flags %#x.\n", flags);
3842     }
3843
3844     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
3845     {
3846         ERR("Not supported on swapchain surfaces.\n");
3847         return WINEDDERR_NOTFLIPPABLE;
3848     }
3849
3850     /* Flipping is only supported on render targets and overlays. */
3851     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
3852     {
3853         WARN("Tried to flip a non-render target, non-overlay surface.\n");
3854         return WINEDDERR_NOTFLIPPABLE;
3855     }
3856
3857     flip_surface(surface, override);
3858
3859     /* Update overlays if they're visible. */
3860     if ((surface->resource.usage & WINED3DUSAGE_OVERLAY) && surface->overlay_dest)
3861         return surface->surface_ops->surface_draw_overlay(surface);
3862
3863     return WINED3D_OK;
3864 }
3865
3866 /* Do not call while under the GL lock. */
3867 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3868 {
3869     struct wined3d_device *device = surface->resource.device;
3870
3871     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3872
3873     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3874     {
3875         struct wined3d_texture *texture = surface->container.u.texture;
3876
3877         TRACE("Passing to container (%p).\n", texture);
3878         texture->texture_ops->texture_preload(texture, srgb);
3879     }
3880     else
3881     {
3882         struct wined3d_context *context;
3883
3884         TRACE("(%p) : About to load surface\n", surface);
3885
3886         /* TODO: Use already acquired context when possible. */
3887         context = context_acquire(device, NULL);
3888
3889         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3890
3891         if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3892         {
3893             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3894             GLclampf tmp;
3895             tmp = 0.9f;
3896             ENTER_GL();
3897             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3898             LEAVE_GL();
3899         }
3900
3901         context_release(context);
3902     }
3903 }
3904
3905 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3906 {
3907     if (!surface->resource.allocatedMemory)
3908     {
3909         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
3910                 surface->resource.size + RESOURCE_ALIGNMENT);
3911         if (!surface->resource.heapMemory)
3912         {
3913             ERR("Out of memory\n");
3914             return FALSE;
3915         }
3916         surface->resource.allocatedMemory =
3917             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
3918     }
3919     else
3920     {
3921         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
3922     }
3923
3924     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3925
3926     return TRUE;
3927 }
3928
3929 /* Read the framebuffer back into the surface */
3930 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
3931 {
3932     struct wined3d_device *device = surface->resource.device;
3933     const struct wined3d_gl_info *gl_info;
3934     struct wined3d_context *context;
3935     BYTE *mem;
3936     GLint fmt;
3937     GLint type;
3938     BYTE *row, *top, *bottom;
3939     int i;
3940     BOOL bpp;
3941     RECT local_rect;
3942     BOOL srcIsUpsideDown;
3943     GLint rowLen = 0;
3944     GLint skipPix = 0;
3945     GLint skipRow = 0;
3946
3947     if(wined3d_settings.rendertargetlock_mode == RTL_DISABLE) {
3948         static BOOL warned = FALSE;
3949         if(!warned) {
3950             ERR("The application tries to lock the render target, but render target locking is disabled\n");
3951             warned = TRUE;
3952         }
3953         return;
3954     }
3955
3956     context = context_acquire(device, surface);
3957     context_apply_blit_state(context, device);
3958     gl_info = context->gl_info;
3959
3960     ENTER_GL();
3961
3962     /* Select the correct read buffer, and give some debug output.
3963      * There is no need to keep track of the current read buffer or reset it, every part of the code
3964      * that reads sets the read buffer as desired.
3965      */
3966     if (surface_is_offscreen(surface))
3967     {
3968         /* Mapping the primary render target which is not on a swapchain.
3969          * Read from the back buffer. */
3970         TRACE("Mapping offscreen render target.\n");
3971         glReadBuffer(device->offscreenBuffer);
3972         srcIsUpsideDown = TRUE;
3973     }
3974     else
3975     {
3976         /* Onscreen surfaces are always part of a swapchain */
3977         GLenum buffer = surface_get_gl_buffer(surface);
3978         TRACE("Mapping %#x buffer.\n", buffer);
3979         glReadBuffer(buffer);
3980         checkGLcall("glReadBuffer");
3981         srcIsUpsideDown = FALSE;
3982     }
3983
3984     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
3985     if (!rect)
3986     {
3987         local_rect.left = 0;
3988         local_rect.top = 0;
3989         local_rect.right = surface->resource.width;
3990         local_rect.bottom = surface->resource.height;
3991     }
3992     else
3993     {
3994         local_rect = *rect;
3995     }
3996     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
3997
3998     switch (surface->resource.format->id)
3999     {
4000         case WINED3DFMT_P8_UINT:
4001         {
4002             if (primary_render_target_is_p8(device))
4003             {
4004                 /* In case of P8 render targets the index is stored in the alpha component */
4005                 fmt = GL_ALPHA;
4006                 type = GL_UNSIGNED_BYTE;
4007                 mem = dest;
4008                 bpp = surface->resource.format->byte_count;
4009             }
4010             else
4011             {
4012                 /* GL can't return palettized data, so read ARGB pixels into a
4013                  * separate block of memory and convert them into palettized format
4014                  * in software. Slow, but if the app means to use palettized render
4015                  * targets and locks it...
4016                  *
4017                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4018                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4019                  * for the color channels when palettizing the colors.
4020                  */
4021                 fmt = GL_RGB;
4022                 type = GL_UNSIGNED_BYTE;
4023                 pitch *= 3;
4024                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4025                 if (!mem)
4026                 {
4027                     ERR("Out of memory\n");
4028                     LEAVE_GL();
4029                     return;
4030                 }
4031                 bpp = surface->resource.format->byte_count * 3;
4032             }
4033         }
4034         break;
4035
4036         default:
4037             mem = dest;
4038             fmt = surface->resource.format->glFormat;
4039             type = surface->resource.format->glType;
4040             bpp = surface->resource.format->byte_count;
4041     }
4042
4043     if (surface->flags & SFLAG_PBO)
4044     {
4045         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4046         checkGLcall("glBindBufferARB");
4047         if (mem)
4048         {
4049             ERR("mem not null for pbo -- unexpected\n");
4050             mem = NULL;
4051         }
4052     }
4053
4054     /* Save old pixel store pack state */
4055     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4056     checkGLcall("glGetIntegerv");
4057     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4058     checkGLcall("glGetIntegerv");
4059     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4060     checkGLcall("glGetIntegerv");
4061
4062     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4063     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4064     checkGLcall("glPixelStorei");
4065     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4066     checkGLcall("glPixelStorei");
4067     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4068     checkGLcall("glPixelStorei");
4069
4070     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4071             local_rect.right - local_rect.left,
4072             local_rect.bottom - local_rect.top,
4073             fmt, type, mem);
4074     checkGLcall("glReadPixels");
4075
4076     /* Reset previous pixel store pack state */
4077     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4078     checkGLcall("glPixelStorei");
4079     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4080     checkGLcall("glPixelStorei");
4081     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4082     checkGLcall("glPixelStorei");
4083
4084     if (surface->flags & SFLAG_PBO)
4085     {
4086         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4087         checkGLcall("glBindBufferARB");
4088
4089         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4090          * to get a pointer to it and perform the flipping in software. This is a lot
4091          * faster than calling glReadPixels for each line. In case we want more speed
4092          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4093         if (!srcIsUpsideDown)
4094         {
4095             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4096             checkGLcall("glBindBufferARB");
4097
4098             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4099             checkGLcall("glMapBufferARB");
4100         }
4101     }
4102
4103     /* TODO: Merge this with the palettization loop below for P8 targets */
4104     if(!srcIsUpsideDown) {
4105         UINT len, off;
4106         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4107             Flip the lines in software */
4108         len = (local_rect.right - local_rect.left) * bpp;
4109         off = local_rect.left * bpp;
4110
4111         row = HeapAlloc(GetProcessHeap(), 0, len);
4112         if(!row) {
4113             ERR("Out of memory\n");
4114             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4115                 HeapFree(GetProcessHeap(), 0, mem);
4116             LEAVE_GL();
4117             return;
4118         }
4119
4120         top = mem + pitch * local_rect.top;
4121         bottom = mem + pitch * (local_rect.bottom - 1);
4122         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4123             memcpy(row, top + off, len);
4124             memcpy(top + off, bottom + off, len);
4125             memcpy(bottom + off, row, len);
4126             top += pitch;
4127             bottom -= pitch;
4128         }
4129         HeapFree(GetProcessHeap(), 0, row);
4130
4131         /* Unmap the temp PBO buffer */
4132         if (surface->flags & SFLAG_PBO)
4133         {
4134             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4135             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4136         }
4137     }
4138
4139     LEAVE_GL();
4140     context_release(context);
4141
4142     /* For P8 textures we need to perform an inverse palette lookup. This is
4143      * done by searching for a palette index which matches the RGB value.
4144      * Note this isn't guaranteed to work when there are multiple entries for
4145      * the same color but we have no choice. In case of P8 render targets,
4146      * the index is stored in the alpha component so no conversion is needed. */
4147     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4148     {
4149         const PALETTEENTRY *pal = NULL;
4150         DWORD width = pitch / 3;
4151         int x, y, c;
4152
4153         if (surface->palette)
4154         {
4155             pal = surface->palette->palents;
4156         }
4157         else
4158         {
4159             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4160             HeapFree(GetProcessHeap(), 0, mem);
4161             return;
4162         }
4163
4164         for(y = local_rect.top; y < local_rect.bottom; y++) {
4165             for(x = local_rect.left; x < local_rect.right; x++) {
4166                 /*                      start              lines            pixels      */
4167                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4168                 const BYTE *green = blue  + 1;
4169                 const BYTE *red = green + 1;
4170
4171                 for(c = 0; c < 256; c++) {
4172                     if(*red   == pal[c].peRed   &&
4173                        *green == pal[c].peGreen &&
4174                        *blue  == pal[c].peBlue)
4175                     {
4176                         *((BYTE *) dest + y * width + x) = c;
4177                         break;
4178                     }
4179                 }
4180             }
4181         }
4182         HeapFree(GetProcessHeap(), 0, mem);
4183     }
4184 }
4185
4186 /* Read the framebuffer contents into a texture */
4187 static void read_from_framebuffer_texture(struct wined3d_surface *surface, BOOL srgb)
4188 {
4189     struct wined3d_device *device = surface->resource.device;
4190     struct wined3d_context *context;
4191
4192     if (!surface_is_offscreen(surface))
4193     {
4194         /* We would need to flip onscreen surfaces, but there's no efficient
4195          * way to do that here. It makes more sense for the caller to
4196          * explicitly go through sysmem. */
4197         ERR("Not supported for onscreen targets.\n");
4198         return;
4199     }
4200
4201     /* Activate the surface to read from. In some situations it isn't the currently active target(e.g. backbuffer
4202      * locking during offscreen rendering). RESOURCELOAD is ok because glCopyTexSubImage2D isn't affected by any
4203      * states in the stateblock, and no driver was found yet that had bugs in that regard.
4204      */
4205     context = context_acquire(device, surface);
4206     device_invalidate_state(device, STATE_FRAMEBUFFER);
4207
4208     surface_prepare_texture(surface, context, srgb);
4209     surface_bind_and_dirtify(surface, context, srgb);
4210
4211     TRACE("Reading back offscreen render target %p.\n", surface);
4212
4213     ENTER_GL();
4214
4215     glReadBuffer(device->offscreenBuffer);
4216     checkGLcall("glReadBuffer");
4217
4218     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4219             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4220     checkGLcall("glCopyTexSubImage2D");
4221
4222     LEAVE_GL();
4223
4224     context_release(context);
4225 }
4226
4227 /* Context activation is done by the caller. */
4228 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4229         struct wined3d_context *context, BOOL srgb)
4230 {
4231     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4232     CONVERT_TYPES convert;
4233     struct wined3d_format format;
4234
4235     if (surface->flags & alloc_flag) return;
4236
4237     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4238     if (convert != NO_CONVERSION || format.convert) surface->flags |= SFLAG_CONVERTED;
4239     else surface->flags &= ~SFLAG_CONVERTED;
4240
4241     surface_bind_and_dirtify(surface, context, srgb);
4242     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4243     surface->flags |= alloc_flag;
4244 }
4245
4246 /* Context activation is done by the caller. */
4247 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4248 {
4249     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4250     {
4251         struct wined3d_texture *texture = surface->container.u.texture;
4252         UINT sub_count = texture->level_count * texture->layer_count;
4253         UINT i;
4254
4255         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4256
4257         for (i = 0; i < sub_count; ++i)
4258         {
4259             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4260             surface_prepare_texture_internal(s, context, srgb);
4261         }
4262
4263         return;
4264     }
4265
4266     surface_prepare_texture_internal(surface, context, srgb);
4267 }
4268
4269 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4270 {
4271     if (multisample)
4272     {
4273         if (surface->rb_multisample)
4274             return;
4275
4276         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4277         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4278         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4279                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4280         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4281     }
4282     else
4283     {
4284         if (surface->rb_resolved)
4285             return;
4286
4287         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4288         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4289         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4290                 surface->pow2Width, surface->pow2Height);
4291         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4292     }
4293 }
4294
4295 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4296         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4297 {
4298     struct wined3d_device *device = surface->resource.device;
4299     UINT pitch = wined3d_surface_get_pitch(surface);
4300     const struct wined3d_gl_info *gl_info;
4301     struct wined3d_context *context;
4302     RECT local_rect;
4303     UINT w, h;
4304
4305     surface_get_rect(surface, rect, &local_rect);
4306
4307     mem += local_rect.top * pitch + local_rect.left * bpp;
4308     w = local_rect.right - local_rect.left;
4309     h = local_rect.bottom - local_rect.top;
4310
4311     /* Activate the correct context for the render target */
4312     context = context_acquire(device, surface);
4313     context_apply_blit_state(context, device);
4314     gl_info = context->gl_info;
4315
4316     ENTER_GL();
4317
4318     if (!surface_is_offscreen(surface))
4319     {
4320         GLenum buffer = surface_get_gl_buffer(surface);
4321         TRACE("Unlocking %#x buffer.\n", buffer);
4322         context_set_draw_buffer(context, buffer);
4323
4324         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4325         glPixelZoom(1.0f, -1.0f);
4326     }
4327     else
4328     {
4329         /* Primary offscreen render target */
4330         TRACE("Offscreen render target.\n");
4331         context_set_draw_buffer(context, device->offscreenBuffer);
4332
4333         glPixelZoom(1.0f, 1.0f);
4334     }
4335
4336     glRasterPos3i(local_rect.left, local_rect.top, 1);
4337     checkGLcall("glRasterPos3i");
4338
4339     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4340     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4341
4342     if (surface->flags & SFLAG_PBO)
4343     {
4344         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4345         checkGLcall("glBindBufferARB");
4346     }
4347
4348     glDrawPixels(w, h, fmt, type, mem);
4349     checkGLcall("glDrawPixels");
4350
4351     if (surface->flags & SFLAG_PBO)
4352     {
4353         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4354         checkGLcall("glBindBufferARB");
4355     }
4356
4357     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4358     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4359
4360     LEAVE_GL();
4361
4362     if (wined3d_settings.strict_draw_ordering
4363             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4364             && surface->container.u.swapchain->front_buffer == surface))
4365         wglFlush();
4366
4367     context_release(context);
4368 }
4369
4370 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck,
4371         BOOL use_texturing, struct wined3d_format *format, CONVERT_TYPES *convert)
4372 {
4373     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4374     const struct wined3d_device *device = surface->resource.device;
4375     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4376     BOOL blit_supported = FALSE;
4377
4378     /* Copy the default values from the surface. Below we might perform fixups */
4379     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4380     *format = *surface->resource.format;
4381     *convert = NO_CONVERSION;
4382
4383     /* Ok, now look if we have to do any conversion */
4384     switch (surface->resource.format->id)
4385     {
4386         case WINED3DFMT_P8_UINT:
4387             /* Below the call to blit_supported is disabled for Wine 1.2
4388              * because the function isn't operating correctly yet. At the
4389              * moment 8-bit blits are handled in software and if certain GL
4390              * extensions are around, surface conversion is performed at
4391              * upload time. The blit_supported call recognizes it as a
4392              * destination fixup. This type of upload 'fixup' and 8-bit to
4393              * 8-bit blits need to be handled by the blit_shader.
4394              * TODO: get rid of this #if 0. */
4395 #if 0
4396             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4397                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4398                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4399 #endif
4400             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4401
4402             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4403              * texturing. Further also use conversion in case of color keying.
4404              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4405              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4406              * conflicts with this.
4407              */
4408             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4409                     || colorkey_active || !use_texturing)
4410             {
4411                 format->glFormat = GL_RGBA;
4412                 format->glInternal = GL_RGBA;
4413                 format->glType = GL_UNSIGNED_BYTE;
4414                 format->conv_byte_count = 4;
4415                 if (colorkey_active)
4416                     *convert = CONVERT_PALETTED_CK;
4417                 else
4418                     *convert = CONVERT_PALETTED;
4419             }
4420             break;
4421
4422         case WINED3DFMT_B2G3R3_UNORM:
4423             /* **********************
4424                 GL_UNSIGNED_BYTE_3_3_2
4425                 ********************** */
4426             if (colorkey_active) {
4427                 /* This texture format will never be used.. So do not care about color keying
4428                     up until the point in time it will be needed :-) */
4429                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4430             }
4431             break;
4432
4433         case WINED3DFMT_B5G6R5_UNORM:
4434             if (colorkey_active)
4435             {
4436                 *convert = CONVERT_CK_565;
4437                 format->glFormat = GL_RGBA;
4438                 format->glInternal = GL_RGB5_A1;
4439                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4440                 format->conv_byte_count = 2;
4441             }
4442             break;
4443
4444         case WINED3DFMT_B5G5R5X1_UNORM:
4445             if (colorkey_active)
4446             {
4447                 *convert = CONVERT_CK_5551;
4448                 format->glFormat = GL_BGRA;
4449                 format->glInternal = GL_RGB5_A1;
4450                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4451                 format->conv_byte_count = 2;
4452             }
4453             break;
4454
4455         case WINED3DFMT_B8G8R8_UNORM:
4456             if (colorkey_active)
4457             {
4458                 *convert = CONVERT_CK_RGB24;
4459                 format->glFormat = GL_RGBA;
4460                 format->glInternal = GL_RGBA8;
4461                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4462                 format->conv_byte_count = 4;
4463             }
4464             break;
4465
4466         case WINED3DFMT_B8G8R8X8_UNORM:
4467             if (colorkey_active)
4468             {
4469                 *convert = CONVERT_RGB32_888;
4470                 format->glFormat = GL_RGBA;
4471                 format->glInternal = GL_RGBA8;
4472                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4473                 format->conv_byte_count = 4;
4474             }
4475             break;
4476
4477         default:
4478             break;
4479     }
4480
4481     return WINED3D_OK;
4482 }
4483
4484 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4485 {
4486     const struct wined3d_device *device = surface->resource.device;
4487     const struct wined3d_palette *pal = surface->palette;
4488     BOOL index_in_alpha = FALSE;
4489     unsigned int i;
4490
4491     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4492      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4493      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4494      * duplicate entries. Store the color key in the unused alpha component to speed the
4495      * download up and to make conversion unneeded. */
4496     index_in_alpha = primary_render_target_is_p8(device);
4497
4498     if (!pal)
4499     {
4500         ERR("This code should never get entered for DirectDraw!, expect problems\n");
4501         if (index_in_alpha)
4502         {
4503             /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4504              * there's no palette at this time. */
4505             for (i = 0; i < 256; i++) table[i][3] = i;
4506         }
4507     }
4508     else
4509     {
4510         TRACE("Using surface palette %p\n", pal);
4511         /* Get the surface's palette */
4512         for (i = 0; i < 256; ++i)
4513         {
4514             table[i][0] = pal->palents[i].peRed;
4515             table[i][1] = pal->palents[i].peGreen;
4516             table[i][2] = pal->palents[i].peBlue;
4517
4518             /* When index_in_alpha is set the palette index is stored in the
4519              * alpha component. In case of a readback we can then read
4520              * GL_ALPHA. Color keying is handled in BltOverride using a
4521              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4522              * color key itself is passed to glAlphaFunc in other cases the
4523              * alpha component of pixels that should be masked away is set to 0. */
4524             if (index_in_alpha)
4525             {
4526                 table[i][3] = i;
4527             }
4528             else if (colorkey && (i >= surface->SrcBltCKey.dwColorSpaceLowValue)
4529                     && (i <= surface->SrcBltCKey.dwColorSpaceHighValue))
4530             {
4531                 table[i][3] = 0x00;
4532             }
4533             else if (pal->flags & WINEDDPCAPS_ALPHA)
4534             {
4535                 table[i][3] = pal->palents[i].peFlags;
4536             }
4537             else
4538             {
4539                 table[i][3] = 0xFF;
4540             }
4541         }
4542     }
4543 }
4544
4545 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width,
4546         UINT height, UINT outpitch, CONVERT_TYPES convert, struct wined3d_surface *surface)
4547 {
4548     const BYTE *source;
4549     BYTE *dest;
4550     TRACE("(%p)->(%p),(%d,%d,%d,%d,%p)\n", src, dst, pitch, height, outpitch, convert, surface);
4551
4552     switch (convert) {
4553         case NO_CONVERSION:
4554         {
4555             memcpy(dst, src, pitch * height);
4556             break;
4557         }
4558         case CONVERT_PALETTED:
4559         case CONVERT_PALETTED_CK:
4560         {
4561             BYTE table[256][4];
4562             unsigned int x, y;
4563
4564             d3dfmt_p8_init_palette(surface, table, (convert == CONVERT_PALETTED_CK));
4565
4566             for (y = 0; y < height; y++)
4567             {
4568                 source = src + pitch * y;
4569                 dest = dst + outpitch * y;
4570                 /* This is an 1 bpp format, using the width here is fine */
4571                 for (x = 0; x < width; x++) {
4572                     BYTE color = *source++;
4573                     *dest++ = table[color][0];
4574                     *dest++ = table[color][1];
4575                     *dest++ = table[color][2];
4576                     *dest++ = table[color][3];
4577                 }
4578             }
4579         }
4580         break;
4581
4582         case CONVERT_CK_565:
4583         {
4584             /* Converting the 565 format in 5551 packed to emulate color-keying.
4585
4586               Note : in all these conversion, it would be best to average the averaging
4587                       pixels to get the color of the pixel that will be color-keyed to
4588                       prevent 'color bleeding'. This will be done later on if ever it is
4589                       too visible.
4590
4591               Note2: Nvidia documents say that their driver does not support alpha + color keying
4592                      on the same surface and disables color keying in such a case
4593             */
4594             unsigned int x, y;
4595             const WORD *Source;
4596             WORD *Dest;
4597
4598             TRACE("Color keyed 565\n");
4599
4600             for (y = 0; y < height; y++) {
4601                 Source = (const WORD *)(src + y * pitch);
4602                 Dest = (WORD *) (dst + y * outpitch);
4603                 for (x = 0; x < width; x++ ) {
4604                     WORD color = *Source++;
4605                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4606                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4607                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4608                         *Dest |= 0x0001;
4609                     Dest++;
4610                 }
4611             }
4612         }
4613         break;
4614
4615         case CONVERT_CK_5551:
4616         {
4617             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4618             unsigned int x, y;
4619             const WORD *Source;
4620             WORD *Dest;
4621             TRACE("Color keyed 5551\n");
4622             for (y = 0; y < height; y++) {
4623                 Source = (const WORD *)(src + y * pitch);
4624                 Dest = (WORD *) (dst + y * outpitch);
4625                 for (x = 0; x < width; x++ ) {
4626                     WORD color = *Source++;
4627                     *Dest = color;
4628                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4629                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4630                         *Dest |= (1 << 15);
4631                     else
4632                         *Dest &= ~(1 << 15);
4633                     Dest++;
4634                 }
4635             }
4636         }
4637         break;
4638
4639         case CONVERT_CK_RGB24:
4640         {
4641             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4642             unsigned int x, y;
4643             for (y = 0; y < height; y++)
4644             {
4645                 source = src + pitch * y;
4646                 dest = dst + outpitch * y;
4647                 for (x = 0; x < width; x++) {
4648                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4649                     DWORD dstcolor = color << 8;
4650                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4651                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4652                         dstcolor |= 0xff;
4653                     *(DWORD*)dest = dstcolor;
4654                     source += 3;
4655                     dest += 4;
4656                 }
4657             }
4658         }
4659         break;
4660
4661         case CONVERT_RGB32_888:
4662         {
4663             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4664             unsigned int x, y;
4665             for (y = 0; y < height; y++)
4666             {
4667                 source = src + pitch * y;
4668                 dest = dst + outpitch * y;
4669                 for (x = 0; x < width; x++) {
4670                     DWORD color = 0xffffff & *(const DWORD*)source;
4671                     DWORD dstcolor = color << 8;
4672                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4673                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4674                         dstcolor |= 0xff;
4675                     *(DWORD*)dest = dstcolor;
4676                     source += 4;
4677                     dest += 4;
4678                 }
4679             }
4680         }
4681         break;
4682
4683         default:
4684             ERR("Unsupported conversion type %#x.\n", convert);
4685     }
4686     return WINED3D_OK;
4687 }
4688
4689 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4690 {
4691     /* Flip the surface contents */
4692     /* Flip the DC */
4693     {
4694         HDC tmp;
4695         tmp = front->hDC;
4696         front->hDC = back->hDC;
4697         back->hDC = tmp;
4698     }
4699
4700     /* Flip the DIBsection */
4701     {
4702         HBITMAP tmp;
4703         BOOL hasDib = front->flags & SFLAG_DIBSECTION;
4704         tmp = front->dib.DIBsection;
4705         front->dib.DIBsection = back->dib.DIBsection;
4706         back->dib.DIBsection = tmp;
4707
4708         if (back->flags & SFLAG_DIBSECTION) front->flags |= SFLAG_DIBSECTION;
4709         else front->flags &= ~SFLAG_DIBSECTION;
4710         if (hasDib) back->flags |= SFLAG_DIBSECTION;
4711         else back->flags &= ~SFLAG_DIBSECTION;
4712     }
4713
4714     /* Flip the surface data */
4715     {
4716         void* tmp;
4717
4718         tmp = front->dib.bitmap_data;
4719         front->dib.bitmap_data = back->dib.bitmap_data;
4720         back->dib.bitmap_data = tmp;
4721
4722         tmp = front->resource.allocatedMemory;
4723         front->resource.allocatedMemory = back->resource.allocatedMemory;
4724         back->resource.allocatedMemory = tmp;
4725
4726         tmp = front->resource.heapMemory;
4727         front->resource.heapMemory = back->resource.heapMemory;
4728         back->resource.heapMemory = tmp;
4729     }
4730
4731     /* Flip the PBO */
4732     {
4733         GLuint tmp_pbo = front->pbo;
4734         front->pbo = back->pbo;
4735         back->pbo = tmp_pbo;
4736     }
4737
4738     /* client_memory should not be different, but just in case */
4739     {
4740         BOOL tmp;
4741         tmp = front->dib.client_memory;
4742         front->dib.client_memory = back->dib.client_memory;
4743         back->dib.client_memory = tmp;
4744     }
4745
4746     /* Flip the opengl texture */
4747     {
4748         GLuint tmp;
4749
4750         tmp = back->texture_name;
4751         back->texture_name = front->texture_name;
4752         front->texture_name = tmp;
4753
4754         tmp = back->texture_name_srgb;
4755         back->texture_name_srgb = front->texture_name_srgb;
4756         front->texture_name_srgb = tmp;
4757
4758         tmp = back->rb_multisample;
4759         back->rb_multisample = front->rb_multisample;
4760         front->rb_multisample = tmp;
4761
4762         tmp = back->rb_resolved;
4763         back->rb_resolved = front->rb_resolved;
4764         front->rb_resolved = tmp;
4765
4766         resource_unload(&back->resource);
4767         resource_unload(&front->resource);
4768     }
4769
4770     {
4771         DWORD tmp_flags = back->flags;
4772         back->flags = front->flags;
4773         front->flags = tmp_flags;
4774     }
4775 }
4776
4777 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4778  * pixel copy calls. */
4779 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4780         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4781 {
4782     struct wined3d_device *device = dst_surface->resource.device;
4783     float xrel, yrel;
4784     UINT row;
4785     struct wined3d_context *context;
4786     BOOL upsidedown = FALSE;
4787     RECT dst_rect = *dst_rect_in;
4788
4789     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4790      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4791      */
4792     if(dst_rect.top > dst_rect.bottom) {
4793         UINT tmp = dst_rect.bottom;
4794         dst_rect.bottom = dst_rect.top;
4795         dst_rect.top = tmp;
4796         upsidedown = TRUE;
4797     }
4798
4799     context = context_acquire(device, src_surface);
4800     context_apply_blit_state(context, device);
4801     surface_internal_preload(dst_surface, SRGB_RGB);
4802     ENTER_GL();
4803
4804     /* Bind the target texture */
4805     context_bind_texture(context, dst_surface->texture_target, dst_surface->texture_name);
4806     if (surface_is_offscreen(src_surface))
4807     {
4808         TRACE("Reading from an offscreen target\n");
4809         upsidedown = !upsidedown;
4810         glReadBuffer(device->offscreenBuffer);
4811     }
4812     else
4813     {
4814         glReadBuffer(surface_get_gl_buffer(src_surface));
4815     }
4816     checkGLcall("glReadBuffer");
4817
4818     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4819     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4820
4821     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4822     {
4823         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4824
4825         if(Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT) {
4826             ERR("Texture filtering not supported in direct blit\n");
4827         }
4828     }
4829     else if ((Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT)
4830             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4831     {
4832         ERR("Texture filtering not supported in direct blit\n");
4833     }
4834
4835     if (upsidedown
4836             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4837             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4838     {
4839         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4840
4841         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4842                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4843                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4844                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4845     }
4846     else
4847     {
4848         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4849         /* I have to process this row by row to swap the image,
4850          * otherwise it would be upside down, so stretching in y direction
4851          * doesn't cost extra time
4852          *
4853          * However, stretching in x direction can be avoided if not necessary
4854          */
4855         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4856             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4857             {
4858                 /* Well, that stuff works, but it's very slow.
4859                  * find a better way instead
4860                  */
4861                 UINT col;
4862
4863                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4864                 {
4865                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4866                             dst_rect.left + col /* x offset */, row /* y offset */,
4867                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4868                 }
4869             }
4870             else
4871             {
4872                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4873                         dst_rect.left /* x offset */, row /* y offset */,
4874                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4875             }
4876         }
4877     }
4878     checkGLcall("glCopyTexSubImage2D");
4879
4880     LEAVE_GL();
4881     context_release(context);
4882
4883     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4884      * path is never entered
4885      */
4886     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4887 }
4888
4889 /* Uses the hardware to stretch and flip the image */
4890 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4891         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4892 {
4893     struct wined3d_device *device = dst_surface->resource.device;
4894     struct wined3d_swapchain *src_swapchain = NULL;
4895     GLuint src, backup = 0;
4896     float left, right, top, bottom; /* Texture coordinates */
4897     UINT fbwidth = src_surface->resource.width;
4898     UINT fbheight = src_surface->resource.height;
4899     struct wined3d_context *context;
4900     GLenum drawBuffer = GL_BACK;
4901     GLenum texture_target;
4902     BOOL noBackBufferBackup;
4903     BOOL src_offscreen;
4904     BOOL upsidedown = FALSE;
4905     RECT dst_rect = *dst_rect_in;
4906
4907     TRACE("Using hwstretch blit\n");
4908     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4909     context = context_acquire(device, src_surface);
4910     context_apply_blit_state(context, device);
4911     surface_internal_preload(dst_surface, SRGB_RGB);
4912
4913     src_offscreen = surface_is_offscreen(src_surface);
4914     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4915     if (!noBackBufferBackup && !src_surface->texture_name)
4916     {
4917         /* Get it a description */
4918         surface_internal_preload(src_surface, SRGB_RGB);
4919     }
4920     ENTER_GL();
4921
4922     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4923      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4924      */
4925     if (context->aux_buffers >= 2)
4926     {
4927         /* Got more than one aux buffer? Use the 2nd aux buffer */
4928         drawBuffer = GL_AUX1;
4929     }
4930     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4931     {
4932         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
4933         drawBuffer = GL_AUX0;
4934     }
4935
4936     if(noBackBufferBackup) {
4937         glGenTextures(1, &backup);
4938         checkGLcall("glGenTextures");
4939         context_bind_texture(context, GL_TEXTURE_2D, backup);
4940         texture_target = GL_TEXTURE_2D;
4941     } else {
4942         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
4943          * we are reading from the back buffer, the backup can be used as source texture
4944          */
4945         texture_target = src_surface->texture_target;
4946         context_bind_texture(context, texture_target, src_surface->texture_name);
4947         glEnable(texture_target);
4948         checkGLcall("glEnable(texture_target)");
4949
4950         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
4951         src_surface->flags &= ~SFLAG_INTEXTURE;
4952     }
4953
4954     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4955      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4956      */
4957     if(dst_rect.top > dst_rect.bottom) {
4958         UINT tmp = dst_rect.bottom;
4959         dst_rect.bottom = dst_rect.top;
4960         dst_rect.top = tmp;
4961         upsidedown = TRUE;
4962     }
4963
4964     if (src_offscreen)
4965     {
4966         TRACE("Reading from an offscreen target\n");
4967         upsidedown = !upsidedown;
4968         glReadBuffer(device->offscreenBuffer);
4969     }
4970     else
4971     {
4972         glReadBuffer(surface_get_gl_buffer(src_surface));
4973     }
4974
4975     /* TODO: Only back up the part that will be overwritten */
4976     glCopyTexSubImage2D(texture_target, 0,
4977                         0, 0 /* read offsets */,
4978                         0, 0,
4979                         fbwidth,
4980                         fbheight);
4981
4982     checkGLcall("glCopyTexSubImage2D");
4983
4984     /* No issue with overriding these - the sampler is dirty due to blit usage */
4985     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
4986             wined3d_gl_mag_filter(magLookup, Filter));
4987     checkGLcall("glTexParameteri");
4988     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
4989             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
4990     checkGLcall("glTexParameteri");
4991
4992     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
4993         src_swapchain = src_surface->container.u.swapchain;
4994     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
4995     {
4996         src = backup ? backup : src_surface->texture_name;
4997     }
4998     else
4999     {
5000         glReadBuffer(GL_FRONT);
5001         checkGLcall("glReadBuffer(GL_FRONT)");
5002
5003         glGenTextures(1, &src);
5004         checkGLcall("glGenTextures(1, &src)");
5005         context_bind_texture(context, GL_TEXTURE_2D, src);
5006
5007         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5008          * out for power of 2 sizes
5009          */
5010         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5011                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5012         checkGLcall("glTexImage2D");
5013         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
5014                             0, 0 /* read offsets */,
5015                             0, 0,
5016                             fbwidth,
5017                             fbheight);
5018
5019         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5020         checkGLcall("glTexParameteri");
5021         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5022         checkGLcall("glTexParameteri");
5023
5024         glReadBuffer(GL_BACK);
5025         checkGLcall("glReadBuffer(GL_BACK)");
5026
5027         if(texture_target != GL_TEXTURE_2D) {
5028             glDisable(texture_target);
5029             glEnable(GL_TEXTURE_2D);
5030             texture_target = GL_TEXTURE_2D;
5031         }
5032     }
5033     checkGLcall("glEnd and previous");
5034
5035     left = src_rect->left;
5036     right = src_rect->right;
5037
5038     if (!upsidedown)
5039     {
5040         top = src_surface->resource.height - src_rect->top;
5041         bottom = src_surface->resource.height - src_rect->bottom;
5042     }
5043     else
5044     {
5045         top = src_surface->resource.height - src_rect->bottom;
5046         bottom = src_surface->resource.height - src_rect->top;
5047     }
5048
5049     if (src_surface->flags & SFLAG_NORMCOORD)
5050     {
5051         left /= src_surface->pow2Width;
5052         right /= src_surface->pow2Width;
5053         top /= src_surface->pow2Height;
5054         bottom /= src_surface->pow2Height;
5055     }
5056
5057     /* draw the source texture stretched and upside down. The correct surface is bound already */
5058     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5059     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5060
5061     context_set_draw_buffer(context, drawBuffer);
5062     glReadBuffer(drawBuffer);
5063
5064     glBegin(GL_QUADS);
5065         /* bottom left */
5066         glTexCoord2f(left, bottom);
5067         glVertex2i(0, 0);
5068
5069         /* top left */
5070         glTexCoord2f(left, top);
5071         glVertex2i(0, dst_rect.bottom - dst_rect.top);
5072
5073         /* top right */
5074         glTexCoord2f(right, top);
5075         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5076
5077         /* bottom right */
5078         glTexCoord2f(right, bottom);
5079         glVertex2i(dst_rect.right - dst_rect.left, 0);
5080     glEnd();
5081     checkGLcall("glEnd and previous");
5082
5083     if (texture_target != dst_surface->texture_target)
5084     {
5085         glDisable(texture_target);
5086         glEnable(dst_surface->texture_target);
5087         texture_target = dst_surface->texture_target;
5088     }
5089
5090     /* Now read the stretched and upside down image into the destination texture */
5091     context_bind_texture(context, texture_target, dst_surface->texture_name);
5092     glCopyTexSubImage2D(texture_target,
5093                         0,
5094                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5095                         0, 0, /* We blitted the image to the origin */
5096                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5097     checkGLcall("glCopyTexSubImage2D");
5098
5099     if(drawBuffer == GL_BACK) {
5100         /* Write the back buffer backup back */
5101         if(backup) {
5102             if(texture_target != GL_TEXTURE_2D) {
5103                 glDisable(texture_target);
5104                 glEnable(GL_TEXTURE_2D);
5105                 texture_target = GL_TEXTURE_2D;
5106             }
5107             context_bind_texture(context, GL_TEXTURE_2D, backup);
5108         }
5109         else
5110         {
5111             if (texture_target != src_surface->texture_target)
5112             {
5113                 glDisable(texture_target);
5114                 glEnable(src_surface->texture_target);
5115                 texture_target = src_surface->texture_target;
5116             }
5117             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5118         }
5119
5120         glBegin(GL_QUADS);
5121             /* top left */
5122             glTexCoord2f(0.0f, 0.0f);
5123             glVertex2i(0, fbheight);
5124
5125             /* bottom left */
5126             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5127             glVertex2i(0, 0);
5128
5129             /* bottom right */
5130             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5131                     (float)fbheight / (float)src_surface->pow2Height);
5132             glVertex2i(fbwidth, 0);
5133
5134             /* top right */
5135             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5136             glVertex2i(fbwidth, fbheight);
5137         glEnd();
5138     }
5139     glDisable(texture_target);
5140     checkGLcall("glDisable(texture_target)");
5141
5142     /* Cleanup */
5143     if (src != src_surface->texture_name && src != backup)
5144     {
5145         glDeleteTextures(1, &src);
5146         checkGLcall("glDeleteTextures(1, &src)");
5147     }
5148     if(backup) {
5149         glDeleteTextures(1, &backup);
5150         checkGLcall("glDeleteTextures(1, &backup)");
5151     }
5152
5153     LEAVE_GL();
5154
5155     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5156
5157     context_release(context);
5158
5159     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5160      * path is never entered
5161      */
5162     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5163 }
5164
5165 /* Front buffer coordinates are always full screen coordinates, but our GL
5166  * drawable is limited to the window's client area. The sysmem and texture
5167  * copies do have the full screen size. Note that GL has a bottom-left
5168  * origin, while D3D has a top-left origin. */
5169 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5170 {
5171     UINT drawable_height;
5172
5173     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5174             && surface == surface->container.u.swapchain->front_buffer)
5175     {
5176         POINT offset = {0, 0};
5177         RECT windowsize;
5178
5179         ScreenToClient(window, &offset);
5180         OffsetRect(rect, offset.x, offset.y);
5181
5182         GetClientRect(window, &windowsize);
5183         drawable_height = windowsize.bottom - windowsize.top;
5184     }
5185     else
5186     {
5187         drawable_height = surface->resource.height;
5188     }
5189
5190     rect->top = drawable_height - rect->top;
5191     rect->bottom = drawable_height - rect->bottom;
5192 }
5193
5194 static void surface_blt_to_drawable(struct wined3d_device *device,
5195         WINED3DTEXTUREFILTERTYPE filter, BOOL color_key,
5196         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5197         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5198 {
5199     struct wined3d_context *context;
5200     RECT src_rect, dst_rect;
5201
5202     src_rect = *src_rect_in;
5203     dst_rect = *dst_rect_in;
5204
5205     /* Make sure the surface is up-to-date. This should probably use
5206      * surface_load_location() and worry about the destination surface too,
5207      * unless we're overwriting it completely. */
5208     surface_internal_preload(src_surface, SRGB_RGB);
5209
5210     /* Activate the destination context, set it up for blitting */
5211     context = context_acquire(device, dst_surface);
5212     context_apply_blit_state(context, device);
5213
5214     if (!surface_is_offscreen(dst_surface))
5215         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5216
5217     device->blitter->set_shader(device->blit_priv, context, src_surface);
5218
5219     ENTER_GL();
5220
5221     if (color_key)
5222     {
5223         glEnable(GL_ALPHA_TEST);
5224         checkGLcall("glEnable(GL_ALPHA_TEST)");
5225
5226         /* When the primary render target uses P8, the alpha component
5227          * contains the palette index. Which means that the colorkey is one of
5228          * the palette entries. In other cases pixels that should be masked
5229          * away have alpha set to 0. */
5230         if (primary_render_target_is_p8(device))
5231             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->SrcBltCKey.dwColorSpaceLowValue / 256.0f);
5232         else
5233             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5234         checkGLcall("glAlphaFunc");
5235     }
5236     else
5237     {
5238         glDisable(GL_ALPHA_TEST);
5239         checkGLcall("glDisable(GL_ALPHA_TEST)");
5240     }
5241
5242     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5243
5244     if (color_key)
5245     {
5246         glDisable(GL_ALPHA_TEST);
5247         checkGLcall("glDisable(GL_ALPHA_TEST)");
5248     }
5249
5250     LEAVE_GL();
5251
5252     /* Leave the opengl state valid for blitting */
5253     device->blitter->unset_shader(context->gl_info);
5254
5255     if (wined3d_settings.strict_draw_ordering
5256             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5257             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5258         wglFlush(); /* Flush to ensure ordering across contexts. */
5259
5260     context_release(context);
5261 }
5262
5263 /* Do not call while under the GL lock. */
5264 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const WINED3DCOLORVALUE *color)
5265 {
5266     struct wined3d_device *device = s->resource.device;
5267     const struct blit_shader *blitter;
5268
5269     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5270             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5271     if (!blitter)
5272     {
5273         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5274         return WINED3DERR_INVALIDCALL;
5275     }
5276
5277     return blitter->color_fill(device, s, rect, color);
5278 }
5279
5280 /* Do not call while under the GL lock. */
5281 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5282         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5283         WINED3DTEXTUREFILTERTYPE Filter)
5284 {
5285     struct wined3d_device *device = dst_surface->resource.device;
5286     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5287     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5288
5289     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5290             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5291             flags, DDBltFx, debug_d3dtexturefiltertype(Filter));
5292
5293     /* Get the swapchain. One of the surfaces has to be a primary surface */
5294     if (dst_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5295     {
5296         WARN("Destination is in sysmem, rejecting gl blt\n");
5297         return WINED3DERR_INVALIDCALL;
5298     }
5299
5300     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5301         dstSwapchain = dst_surface->container.u.swapchain;
5302
5303     if (src_surface)
5304     {
5305         if (src_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5306         {
5307             WARN("Src is in sysmem, rejecting gl blt\n");
5308             return WINED3DERR_INVALIDCALL;
5309         }
5310
5311         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5312             srcSwapchain = src_surface->container.u.swapchain;
5313     }
5314
5315     /* Early sort out of cases where no render target is used */
5316     if (!dstSwapchain && !srcSwapchain
5317             && src_surface != device->fb.render_targets[0]
5318             && dst_surface != device->fb.render_targets[0])
5319     {
5320         TRACE("No surface is render target, not using hardware blit.\n");
5321         return WINED3DERR_INVALIDCALL;
5322     }
5323
5324     /* No destination color keying supported */
5325     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5326     {
5327         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5328         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5329         return WINED3DERR_INVALIDCALL;
5330     }
5331
5332     if (dstSwapchain && dstSwapchain == srcSwapchain)
5333     {
5334         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5335         return WINED3DERR_INVALIDCALL;
5336     }
5337
5338     if (dstSwapchain && srcSwapchain)
5339     {
5340         FIXME("Implement hardware blit between two different swapchains\n");
5341         return WINED3DERR_INVALIDCALL;
5342     }
5343
5344     if (dstSwapchain)
5345     {
5346         /* Handled with regular texture -> swapchain blit */
5347         if (src_surface == device->fb.render_targets[0])
5348             TRACE("Blit from active render target to a swapchain\n");
5349     }
5350     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5351     {
5352         FIXME("Implement blit from a swapchain to the active render target\n");
5353         return WINED3DERR_INVALIDCALL;
5354     }
5355
5356     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5357     {
5358         /* Blit from render target to texture */
5359         BOOL stretchx;
5360
5361         /* P8 read back is not implemented */
5362         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5363                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5364         {
5365             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5366             return WINED3DERR_INVALIDCALL;
5367         }
5368
5369         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5370         {
5371             TRACE("Color keying not supported by frame buffer to texture blit\n");
5372             return WINED3DERR_INVALIDCALL;
5373             /* Destination color key is checked above */
5374         }
5375
5376         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5377             stretchx = TRUE;
5378         else
5379             stretchx = FALSE;
5380
5381         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5382          * flip the image nor scale it.
5383          *
5384          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5385          * -> If the app wants a image width an unscaled width, copy it line per line
5386          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5387          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5388          *    back buffer. This is slower than reading line per line, thus not used for flipping
5389          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5390          *    pixel by pixel. */
5391         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5392                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5393         {
5394             TRACE("No stretching in x direction, using direct framebuffer -> texture copy\n");
5395             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, Filter);
5396         } else {
5397             TRACE("Using hardware stretching to flip / stretch the texture\n");
5398             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, Filter);
5399         }
5400
5401         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5402         {
5403             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5404             dst_surface->resource.allocatedMemory = NULL;
5405             dst_surface->resource.heapMemory = NULL;
5406         }
5407         else
5408         {
5409             dst_surface->flags &= ~SFLAG_INSYSMEM;
5410         }
5411
5412         return WINED3D_OK;
5413     }
5414     else if (src_surface)
5415     {
5416         /* Blit from offscreen surface to render target */
5417         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5418         WINEDDCOLORKEY oldBltCKey = src_surface->SrcBltCKey;
5419
5420         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5421
5422         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5423                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5424                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5425         {
5426             FIXME("Unsupported blit operation falling back to software\n");
5427             return WINED3DERR_INVALIDCALL;
5428         }
5429
5430         /* Color keying: Check if we have to do a color keyed blt,
5431          * and if not check if a color key is activated.
5432          *
5433          * Just modify the color keying parameters in the surface and restore them afterwards
5434          * The surface keeps track of the color key last used to load the opengl surface.
5435          * PreLoad will catch the change to the flags and color key and reload if necessary.
5436          */
5437         if (flags & WINEDDBLT_KEYSRC)
5438         {
5439             /* Use color key from surface */
5440         }
5441         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5442         {
5443             /* Use color key from DDBltFx */
5444             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5445             src_surface->SrcBltCKey = DDBltFx->ddckSrcColorkey;
5446         }
5447         else
5448         {
5449             /* Do not use color key */
5450             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5451         }
5452
5453         surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5454                 src_surface, src_rect, dst_surface, dst_rect);
5455
5456         /* Restore the color key parameters */
5457         src_surface->CKeyFlags = oldCKeyFlags;
5458         src_surface->SrcBltCKey = oldBltCKey;
5459
5460         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5461
5462         return WINED3D_OK;
5463     }
5464
5465     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5466     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5467     return WINED3DERR_INVALIDCALL;
5468 }
5469
5470 /* GL locking is done by the caller */
5471 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5472         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5473 {
5474     struct wined3d_device *device = surface->resource.device;
5475     const struct wined3d_gl_info *gl_info = context->gl_info;
5476     GLint compare_mode = GL_NONE;
5477     struct blt_info info;
5478     GLint old_binding = 0;
5479     RECT rect;
5480
5481     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5482
5483     glDisable(GL_CULL_FACE);
5484     glDisable(GL_BLEND);
5485     glDisable(GL_ALPHA_TEST);
5486     glDisable(GL_SCISSOR_TEST);
5487     glDisable(GL_STENCIL_TEST);
5488     glEnable(GL_DEPTH_TEST);
5489     glDepthFunc(GL_ALWAYS);
5490     glDepthMask(GL_TRUE);
5491     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5492     glViewport(x, y, w, h);
5493
5494     SetRect(&rect, 0, h, w, 0);
5495     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5496     context_active_texture(context, context->gl_info, 0);
5497     glGetIntegerv(info.binding, &old_binding);
5498     glBindTexture(info.bind_target, texture);
5499     if (gl_info->supported[ARB_SHADOW])
5500     {
5501         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5502         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5503     }
5504
5505     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5506             gl_info, info.tex_type, &surface->ds_current_size);
5507
5508     glBegin(GL_TRIANGLE_STRIP);
5509     glTexCoord3fv(info.coords[0]);
5510     glVertex2f(-1.0f, -1.0f);
5511     glTexCoord3fv(info.coords[1]);
5512     glVertex2f(1.0f, -1.0f);
5513     glTexCoord3fv(info.coords[2]);
5514     glVertex2f(-1.0f, 1.0f);
5515     glTexCoord3fv(info.coords[3]);
5516     glVertex2f(1.0f, 1.0f);
5517     glEnd();
5518
5519     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5520     glBindTexture(info.bind_target, old_binding);
5521
5522     glPopAttrib();
5523
5524     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5525 }
5526
5527 void surface_modify_ds_location(struct wined3d_surface *surface,
5528         DWORD location, UINT w, UINT h)
5529 {
5530     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5531
5532     if (location & ~SFLAG_DS_LOCATIONS)
5533         FIXME("Invalid location (%#x) specified.\n", location);
5534
5535     surface->ds_current_size.cx = w;
5536     surface->ds_current_size.cy = h;
5537     surface->flags &= ~SFLAG_DS_LOCATIONS;
5538     surface->flags |= location;
5539 }
5540
5541 /* Context activation is done by the caller. */
5542 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5543 {
5544     struct wined3d_device *device = surface->resource.device;
5545     GLsizei w, h;
5546
5547     TRACE("surface %p, new location %#x.\n", surface, location);
5548
5549     /* TODO: Make this work for modes other than FBO */
5550     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5551
5552     if (!(surface->flags & location))
5553     {
5554         w = surface->ds_current_size.cx;
5555         h = surface->ds_current_size.cy;
5556         surface->ds_current_size.cx = 0;
5557         surface->ds_current_size.cy = 0;
5558     }
5559     else
5560     {
5561         w = surface->resource.width;
5562         h = surface->resource.height;
5563     }
5564
5565     if (surface->ds_current_size.cx == surface->resource.width
5566             && surface->ds_current_size.cy == surface->resource.height)
5567     {
5568         TRACE("Location (%#x) is already up to date.\n", location);
5569         return;
5570     }
5571
5572     if (surface->current_renderbuffer)
5573     {
5574         FIXME("Not supported with fixed up depth stencil.\n");
5575         return;
5576     }
5577
5578     if (!(surface->flags & SFLAG_DS_LOCATIONS))
5579     {
5580         /* This mostly happens when a depth / stencil is used without being
5581          * cleared first. In principle we could upload from sysmem, or
5582          * explicitly clear before first usage. For the moment there don't
5583          * appear to be a lot of applications depending on this, so a FIXME
5584          * should do. */
5585         FIXME("No up to date depth stencil location.\n");
5586         surface->flags |= location;
5587         surface->ds_current_size.cx = surface->resource.width;
5588         surface->ds_current_size.cy = surface->resource.height;
5589         return;
5590     }
5591
5592     if (location == SFLAG_DS_OFFSCREEN)
5593     {
5594         GLint old_binding = 0;
5595         GLenum bind_target;
5596
5597         /* The render target is allowed to be smaller than the depth/stencil
5598          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5599          * than the offscreen surface. Don't overwrite the offscreen surface
5600          * with undefined data. */
5601         w = min(w, context->swapchain->presentParms.BackBufferWidth);
5602         h = min(h, context->swapchain->presentParms.BackBufferHeight);
5603
5604         TRACE("Copying onscreen depth buffer to depth texture.\n");
5605
5606         ENTER_GL();
5607
5608         if (!device->depth_blt_texture)
5609         {
5610             glGenTextures(1, &device->depth_blt_texture);
5611         }
5612
5613         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5614          * directly on the FBO texture. That's because we need to flip. */
5615         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5616                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5617         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5618         {
5619             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5620             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5621         }
5622         else
5623         {
5624             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5625             bind_target = GL_TEXTURE_2D;
5626         }
5627         glBindTexture(bind_target, device->depth_blt_texture);
5628         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5629          * internal format, because the internal format might include stencil
5630          * data. In principle we should copy stencil data as well, but unless
5631          * the driver supports stencil export it's hard to do, and doesn't
5632          * seem to be needed in practice. If the hardware doesn't support
5633          * writing stencil data, the glCopyTexImage2D() call might trigger
5634          * software fallbacks. */
5635         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5636         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5637         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5638         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5639         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5640         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5641         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5642         glBindTexture(bind_target, old_binding);
5643
5644         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5645                 NULL, surface, SFLAG_INTEXTURE);
5646         context_set_draw_buffer(context, GL_NONE);
5647         glReadBuffer(GL_NONE);
5648
5649         /* Do the actual blit */
5650         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5651         checkGLcall("depth_blt");
5652
5653         context_invalidate_state(context, STATE_FRAMEBUFFER);
5654
5655         LEAVE_GL();
5656
5657         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5658     }
5659     else if (location == SFLAG_DS_ONSCREEN)
5660     {
5661         TRACE("Copying depth texture to onscreen depth buffer.\n");
5662
5663         ENTER_GL();
5664
5665         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5666                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5667         surface_depth_blt(surface, context, surface->texture_name,
5668                 0, surface->pow2Height - h, w, h, surface->texture_target);
5669         checkGLcall("depth_blt");
5670
5671         context_invalidate_state(context, STATE_FRAMEBUFFER);
5672
5673         LEAVE_GL();
5674
5675         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5676     }
5677     else
5678     {
5679         ERR("Invalid location (%#x) specified.\n", location);
5680     }
5681
5682     surface->flags |= location;
5683     surface->ds_current_size.cx = surface->resource.width;
5684     surface->ds_current_size.cy = surface->resource.height;
5685 }
5686
5687 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5688 {
5689     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5690     struct wined3d_surface *overlay;
5691
5692     TRACE("surface %p, location %s, persistent %#x.\n",
5693             surface, debug_surflocation(location), persistent);
5694
5695     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5696             && (location & SFLAG_INDRAWABLE))
5697         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5698
5699     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5700             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5701         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5702
5703     if (persistent)
5704     {
5705         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5706                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5707         {
5708             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5709             {
5710                 TRACE("Passing to container.\n");
5711                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5712             }
5713         }
5714         surface->flags &= ~SFLAG_LOCATIONS;
5715         surface->flags |= location;
5716
5717         /* Redraw emulated overlays, if any */
5718         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5719         {
5720             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5721             {
5722                 overlay->surface_ops->surface_draw_overlay(overlay);
5723             }
5724         }
5725     }
5726     else
5727     {
5728         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5729         {
5730             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5731             {
5732                 TRACE("Passing to container\n");
5733                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5734             }
5735         }
5736         surface->flags &= ~location;
5737     }
5738
5739     if (!(surface->flags & SFLAG_LOCATIONS))
5740     {
5741         ERR("Surface %p does not have any up to date location.\n", surface);
5742     }
5743 }
5744
5745 static DWORD resource_access_from_location(DWORD location)
5746 {
5747     switch (location)
5748     {
5749         case SFLAG_INSYSMEM:
5750             return WINED3D_RESOURCE_ACCESS_CPU;
5751
5752         case SFLAG_INDRAWABLE:
5753         case SFLAG_INSRGBTEX:
5754         case SFLAG_INTEXTURE:
5755         case SFLAG_INRB_MULTISAMPLE:
5756         case SFLAG_INRB_RESOLVED:
5757             return WINED3D_RESOURCE_ACCESS_GPU;
5758
5759         default:
5760             FIXME("Unhandled location %#x.\n", location);
5761             return 0;
5762     }
5763 }
5764
5765 static void surface_load_sysmem(struct wined3d_surface *surface,
5766         const struct wined3d_gl_info *gl_info, const RECT *rect)
5767 {
5768     surface_prepare_system_memory(surface);
5769
5770     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED))
5771         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5772
5773     /* Download the surface to system memory. */
5774     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5775     {
5776         struct wined3d_device *device = surface->resource.device;
5777         struct wined3d_context *context;
5778
5779         /* TODO: Use already acquired context when possible. */
5780         context = context_acquire(device, NULL);
5781
5782         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5783         surface_download_data(surface, gl_info);
5784
5785         context_release(context);
5786
5787         return;
5788     }
5789
5790     if (surface->flags & SFLAG_INDRAWABLE)
5791     {
5792         read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5793                 wined3d_surface_get_pitch(surface));
5794         return;
5795     }
5796
5797     FIXME("Can't load surface %p with location flags %#x into sysmem.\n",
5798             surface, surface->flags & SFLAG_LOCATIONS);
5799 }
5800
5801 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5802         const struct wined3d_gl_info *gl_info, const RECT *rect)
5803 {
5804     struct wined3d_device *device = surface->resource.device;
5805     struct wined3d_format format;
5806     CONVERT_TYPES convert;
5807     UINT byte_count;
5808     BYTE *mem;
5809
5810     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5811     {
5812         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5813         return WINED3DERR_INVALIDCALL;
5814     }
5815
5816     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5817         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5818
5819     if (surface->flags & SFLAG_INTEXTURE)
5820     {
5821         RECT r;
5822
5823         surface_get_rect(surface, rect, &r);
5824         surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
5825
5826         return WINED3D_OK;
5827     }
5828
5829     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5830     {
5831         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5832          * path through sysmem. */
5833         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5834     }
5835
5836     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5837
5838     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5839      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5840      * called. */
5841     if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
5842     {
5843         struct wined3d_context *context;
5844
5845         TRACE("Removing the pbo attached to surface %p.\n", surface);
5846
5847         /* TODO: Use already acquired context when possible. */
5848         context = context_acquire(device, NULL);
5849
5850         surface_remove_pbo(surface, gl_info);
5851
5852         context_release(context);
5853     }
5854
5855     if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
5856     {
5857         UINT height = surface->resource.height;
5858         UINT width = surface->resource.width;
5859         UINT src_pitch, dst_pitch;
5860
5861         byte_count = format.conv_byte_count;
5862         src_pitch = wined3d_surface_get_pitch(surface);
5863
5864         /* Stick to the alignment for the converted surface too, makes it
5865          * easier to load the surface. */
5866         dst_pitch = width * byte_count;
5867         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5868
5869         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5870         {
5871             ERR("Out of memory (%u).\n", dst_pitch * height);
5872             return E_OUTOFMEMORY;
5873         }
5874
5875         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5876                 src_pitch, width, height, dst_pitch, convert, surface);
5877
5878         surface->flags |= SFLAG_CONVERTED;
5879     }
5880     else
5881     {
5882         surface->flags &= ~SFLAG_CONVERTED;
5883         mem = surface->resource.allocatedMemory;
5884         byte_count = format.byte_count;
5885     }
5886
5887     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
5888
5889     /* Don't delete PBO memory. */
5890     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
5891         HeapFree(GetProcessHeap(), 0, mem);
5892
5893     return WINED3D_OK;
5894 }
5895
5896 static HRESULT surface_load_texture(struct wined3d_surface *surface,
5897         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
5898 {
5899     const DWORD attach_flags = WINED3DFMT_FLAG_FBO_ATTACHABLE | WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB;
5900     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
5901     struct wined3d_device *device = surface->resource.device;
5902     struct wined3d_context *context;
5903     UINT width, src_pitch, dst_pitch;
5904     struct wined3d_bo_address data;
5905     struct wined3d_format format;
5906     POINT dst_point = {0, 0};
5907     CONVERT_TYPES convert;
5908     BYTE *mem;
5909
5910     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
5911             && surface_is_offscreen(surface)
5912             && (surface->flags & SFLAG_INDRAWABLE))
5913     {
5914         read_from_framebuffer_texture(surface, srgb);
5915
5916         return WINED3D_OK;
5917     }
5918
5919     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
5920             && (surface->resource.format->flags & attach_flags) == attach_flags
5921             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5922                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
5923                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
5924     {
5925         if (srgb)
5926             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INTEXTURE,
5927                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
5928         else
5929             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INSRGBTEX,
5930                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
5931
5932         return WINED3D_OK;
5933     }
5934
5935     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED)
5936             && (surface->resource.format->flags & attach_flags) == attach_flags
5937             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5938                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
5939                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
5940     {
5941         DWORD src_location = surface->flags & SFLAG_INRB_RESOLVED ? SFLAG_INRB_RESOLVED : SFLAG_INRB_MULTISAMPLE;
5942         DWORD dst_location = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
5943         RECT rect = {0, 0, surface->resource.width, surface->resource.height};
5944
5945         surface_blt_fbo(device, WINED3DTEXF_POINT, surface, src_location,
5946                 &rect, surface, dst_location, &rect);
5947
5948         return WINED3D_OK;
5949     }
5950
5951     /* Upload from system memory */
5952
5953     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
5954             TRUE /* We will use textures */, &format, &convert);
5955
5956     if (srgb)
5957     {
5958         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
5959         {
5960             /* Performance warning... */
5961             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
5962             surface_load_location(surface, SFLAG_INSYSMEM, rect);
5963         }
5964     }
5965     else
5966     {
5967         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
5968         {
5969             /* Performance warning... */
5970             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
5971             surface_load_location(surface, SFLAG_INSYSMEM, rect);
5972         }
5973     }
5974
5975     if (!(surface->flags & SFLAG_INSYSMEM))
5976     {
5977         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
5978         /* Lets hope we get it from somewhere... */
5979         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5980     }
5981
5982     /* TODO: Use already acquired context when possible. */
5983     context = context_acquire(device, NULL);
5984
5985     surface_prepare_texture(surface, context, srgb);
5986     surface_bind_and_dirtify(surface, context, srgb);
5987
5988     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
5989     {
5990         surface->flags |= SFLAG_GLCKEY;
5991         surface->glCKey = surface->SrcBltCKey;
5992     }
5993     else surface->flags &= ~SFLAG_GLCKEY;
5994
5995     width = surface->resource.width;
5996     src_pitch = wined3d_surface_get_pitch(surface);
5997
5998     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5999      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6000      * called. */
6001     if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
6002     {
6003         TRACE("Removing the pbo attached to surface %p.\n", surface);
6004         surface_remove_pbo(surface, gl_info);
6005     }
6006
6007     if (format.convert)
6008     {
6009         /* This code is entered for texture formats which need a fixup. */
6010         UINT height = surface->resource.height;
6011
6012         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6013         dst_pitch = width * format.conv_byte_count;
6014         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6015
6016         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6017         {
6018             ERR("Out of memory (%u).\n", dst_pitch * height);
6019             context_release(context);
6020             return E_OUTOFMEMORY;
6021         }
6022         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6023     }
6024     else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
6025     {
6026         /* This code is only entered for color keying fixups */
6027         UINT height = surface->resource.height;
6028
6029         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6030         dst_pitch = width * format.conv_byte_count;
6031         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6032
6033         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6034         {
6035             ERR("Out of memory (%u).\n", dst_pitch * height);
6036             context_release(context);
6037             return E_OUTOFMEMORY;
6038         }
6039         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6040                 width, height, dst_pitch, convert, surface);
6041     }
6042     else
6043     {
6044         mem = surface->resource.allocatedMemory;
6045     }
6046
6047     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6048     data.addr = mem;
6049     surface_upload_data(surface, gl_info, &format, &src_rect, width, &dst_point, srgb, &data);
6050
6051     context_release(context);
6052
6053     /* Don't delete PBO memory. */
6054     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6055         HeapFree(GetProcessHeap(), 0, mem);
6056
6057     return WINED3D_OK;
6058 }
6059
6060 static void surface_multisample_resolve(struct wined3d_surface *surface)
6061 {
6062     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6063
6064     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6065         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6066
6067     surface_blt_fbo(surface->resource.device, WINED3DTEXF_POINT,
6068             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6069 }
6070
6071 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6072 {
6073     struct wined3d_device *device = surface->resource.device;
6074     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6075     HRESULT hr;
6076
6077     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6078
6079     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6080     {
6081         if (location == SFLAG_INTEXTURE)
6082         {
6083             struct wined3d_context *context = context_acquire(device, NULL);
6084             surface_load_ds_location(surface, context, SFLAG_DS_OFFSCREEN);
6085             context_release(context);
6086             return WINED3D_OK;
6087         }
6088         else
6089         {
6090             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6091             return WINED3DERR_INVALIDCALL;
6092         }
6093     }
6094
6095     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6096         location = SFLAG_INTEXTURE;
6097
6098     if (surface->flags & location)
6099     {
6100         TRACE("Location already up to date.\n");
6101         return WINED3D_OK;
6102     }
6103
6104     if (WARN_ON(d3d_surface))
6105     {
6106         DWORD required_access = resource_access_from_location(location);
6107         if ((surface->resource.access_flags & required_access) != required_access)
6108             WARN("Operation requires %#x access, but surface only has %#x.\n",
6109                     required_access, surface->resource.access_flags);
6110     }
6111
6112     if (!(surface->flags & SFLAG_LOCATIONS))
6113     {
6114         ERR("Surface %p does not have any up to date location.\n", surface);
6115         surface->flags |= SFLAG_LOST;
6116         return WINED3DERR_DEVICELOST;
6117     }
6118
6119     switch (location)
6120     {
6121         case SFLAG_INSYSMEM:
6122             surface_load_sysmem(surface, gl_info, rect);
6123             break;
6124
6125         case SFLAG_INDRAWABLE:
6126             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6127                 return hr;
6128             break;
6129
6130         case SFLAG_INRB_RESOLVED:
6131             surface_multisample_resolve(surface);
6132             break;
6133
6134         case SFLAG_INTEXTURE:
6135         case SFLAG_INSRGBTEX:
6136             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6137                 return hr;
6138             break;
6139
6140         default:
6141             ERR("Don't know how to handle location %#x.\n", location);
6142             break;
6143     }
6144
6145     if (!rect)
6146     {
6147         surface->flags |= location;
6148
6149         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6150             surface_evict_sysmem(surface);
6151     }
6152
6153     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6154             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6155     {
6156         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6157     }
6158
6159     return WINED3D_OK;
6160 }
6161
6162 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6163 {
6164     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6165
6166     /* Not on a swapchain - must be offscreen */
6167     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6168
6169     /* The front buffer is always onscreen */
6170     if (surface == swapchain->front_buffer) return FALSE;
6171
6172     /* If the swapchain is rendered to an FBO, the backbuffer is
6173      * offscreen, otherwise onscreen */
6174     return swapchain->render_to_fbo;
6175 }
6176
6177 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6178 /* Context activation is done by the caller. */
6179 static void ffp_blit_free(struct wined3d_device *device) { }
6180
6181 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6182 /* Context activation is done by the caller. */
6183 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6184 {
6185     BYTE table[256][4];
6186     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6187
6188     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6189
6190     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6191     ENTER_GL();
6192     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6193     LEAVE_GL();
6194 }
6195
6196 /* Context activation is done by the caller. */
6197 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, struct wined3d_surface *surface)
6198 {
6199     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6200
6201     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6202      * else the surface is converted in software at upload time in LoadLocation.
6203      */
6204     if(fixup == COMPLEX_FIXUP_P8 && context->gl_info->supported[EXT_PALETTED_TEXTURE])
6205         ffp_blit_p8_upload_palette(surface, context->gl_info);
6206
6207     ENTER_GL();
6208     glEnable(surface->texture_target);
6209     checkGLcall("glEnable(surface->texture_target)");
6210     LEAVE_GL();
6211     return WINED3D_OK;
6212 }
6213
6214 /* Context activation is done by the caller. */
6215 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6216 {
6217     ENTER_GL();
6218     glDisable(GL_TEXTURE_2D);
6219     checkGLcall("glDisable(GL_TEXTURE_2D)");
6220     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6221     {
6222         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6223         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6224     }
6225     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6226     {
6227         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6228         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6229     }
6230     LEAVE_GL();
6231 }
6232
6233 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6234         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6235         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6236 {
6237     enum complex_fixup src_fixup;
6238
6239     switch (blit_op)
6240     {
6241         case WINED3D_BLIT_OP_COLOR_BLIT:
6242             if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
6243                 return FALSE;
6244
6245             src_fixup = get_complex_fixup(src_format->color_fixup);
6246             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6247             {
6248                 TRACE("Checking support for fixup:\n");
6249                 dump_color_fixup_desc(src_format->color_fixup);
6250             }
6251
6252             if (!is_identity_fixup(dst_format->color_fixup))
6253             {
6254                 TRACE("Destination fixups are not supported\n");
6255                 return FALSE;
6256             }
6257
6258             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6259             {
6260                 TRACE("P8 fixup supported\n");
6261                 return TRUE;
6262             }
6263
6264             /* We only support identity conversions. */
6265             if (is_identity_fixup(src_format->color_fixup))
6266             {
6267                 TRACE("[OK]\n");
6268                 return TRUE;
6269             }
6270
6271             TRACE("[FAILED]\n");
6272             return FALSE;
6273
6274         case WINED3D_BLIT_OP_COLOR_FILL:
6275             if (dst_pool == WINED3DPOOL_SYSTEMMEM)
6276                 return FALSE;
6277
6278             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6279             {
6280                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6281                     return FALSE;
6282             }
6283             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6284             {
6285                 TRACE("Color fill not supported\n");
6286                 return FALSE;
6287             }
6288
6289             /* FIXME: We should reject color fills on formats with fixups,
6290              * but this would break P8 color fills for example. */
6291
6292             return TRUE;
6293
6294         case WINED3D_BLIT_OP_DEPTH_FILL:
6295             return TRUE;
6296
6297         default:
6298             TRACE("Unsupported blit_op=%d\n", blit_op);
6299             return FALSE;
6300     }
6301 }
6302
6303 /* Do not call while under the GL lock. */
6304 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6305         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
6306 {
6307     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6308     struct wined3d_fb_state fb = {&dst_surface, NULL};
6309
6310     return device_clear_render_targets(device, 1, &fb,
6311             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6312 }
6313
6314 /* Do not call while under the GL lock. */
6315 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6316         struct wined3d_surface *surface, const RECT *rect, float depth)
6317 {
6318     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6319     struct wined3d_fb_state fb = {NULL, surface};
6320
6321     return device_clear_render_targets(device, 0, &fb,
6322             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6323 }
6324
6325 const struct blit_shader ffp_blit =  {
6326     ffp_blit_alloc,
6327     ffp_blit_free,
6328     ffp_blit_set,
6329     ffp_blit_unset,
6330     ffp_blit_supported,
6331     ffp_blit_color_fill,
6332     ffp_blit_depth_fill,
6333 };
6334
6335 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6336 {
6337     return WINED3D_OK;
6338 }
6339
6340 /* Context activation is done by the caller. */
6341 static void cpu_blit_free(struct wined3d_device *device)
6342 {
6343 }
6344
6345 /* Context activation is done by the caller. */
6346 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, struct wined3d_surface *surface)
6347 {
6348     return WINED3D_OK;
6349 }
6350
6351 /* Context activation is done by the caller. */
6352 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6353 {
6354 }
6355
6356 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6357         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6358         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6359 {
6360     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6361     {
6362         return TRUE;
6363     }
6364
6365     return FALSE;
6366 }
6367
6368 static HRESULT surface_cpu_blt_compressed(const BYTE *src_data, BYTE *dst_data,
6369         UINT src_pitch, UINT dst_pitch, UINT update_w, UINT update_h,
6370         const struct wined3d_format *format, DWORD flags, const WINEDDBLTFX *fx)
6371 {
6372     UINT row_block_count;
6373     const BYTE *src_row;
6374     BYTE *dst_row;
6375     UINT x, y;
6376
6377     src_row = src_data;
6378     dst_row = dst_data;
6379
6380     row_block_count = (update_w + format->block_width - 1) / format->block_width;
6381
6382     if (!flags)
6383     {
6384         for (y = 0; y < update_h; y += format->block_height)
6385         {
6386             memcpy(dst_row, src_row, row_block_count * format->block_byte_count);
6387             src_row += src_pitch;
6388             dst_row += dst_pitch;
6389         }
6390
6391         return WINED3D_OK;
6392     }
6393
6394     if (flags == WINEDDBLT_DDFX && fx->dwDDFX == WINEDDBLTFX_MIRRORUPDOWN)
6395     {
6396         src_row += (((update_h / format->block_height) - 1) * src_pitch);
6397
6398         switch (format->id)
6399         {
6400             case WINED3DFMT_DXT1:
6401                 for (y = 0; y < update_h; y += format->block_height)
6402                 {
6403                     struct block
6404                     {
6405                         WORD color[2];
6406                         BYTE control_row[4];
6407                     };
6408
6409                     const struct block *s = (const struct block *)src_row;
6410                     struct block *d = (struct block *)dst_row;
6411
6412                     for (x = 0; x < row_block_count; ++x)
6413                     {
6414                         d[x].color[0] = s[x].color[0];
6415                         d[x].color[1] = s[x].color[1];
6416                         d[x].control_row[0] = s[x].control_row[3];
6417                         d[x].control_row[1] = s[x].control_row[2];
6418                         d[x].control_row[2] = s[x].control_row[1];
6419                         d[x].control_row[3] = s[x].control_row[0];
6420                     }
6421                     src_row -= src_pitch;
6422                     dst_row += dst_pitch;
6423                 }
6424                 return WINED3D_OK;
6425
6426             case WINED3DFMT_DXT3:
6427                 for (y = 0; y < update_h; y += format->block_height)
6428                 {
6429                     struct block
6430                     {
6431                         WORD alpha_row[4];
6432                         WORD color[2];
6433                         BYTE control_row[4];
6434                     };
6435
6436                     const struct block *s = (const struct block *)src_row;
6437                     struct block *d = (struct block *)dst_row;
6438
6439                     for (x = 0; x < row_block_count; ++x)
6440                     {
6441                         d[x].alpha_row[0] = s[x].alpha_row[3];
6442                         d[x].alpha_row[1] = s[x].alpha_row[2];
6443                         d[x].alpha_row[2] = s[x].alpha_row[1];
6444                         d[x].alpha_row[3] = s[x].alpha_row[0];
6445                         d[x].color[0] = s[x].color[0];
6446                         d[x].color[1] = s[x].color[1];
6447                         d[x].control_row[0] = s[x].control_row[3];
6448                         d[x].control_row[1] = s[x].control_row[2];
6449                         d[x].control_row[2] = s[x].control_row[1];
6450                         d[x].control_row[3] = s[x].control_row[0];
6451                     }
6452                     src_row -= src_pitch;
6453                     dst_row += dst_pitch;
6454                 }
6455                 return WINED3D_OK;
6456
6457             default:
6458                 FIXME("Compressed flip not implemented for format %s.\n",
6459                         debug_d3dformat(format->id));
6460                 return E_NOTIMPL;
6461         }
6462     }
6463
6464     FIXME("Unsupported blit on compressed surface (format %s, flags %#x, DDFX %#x).\n",
6465             debug_d3dformat(format->id), flags, flags & WINEDDBLT_DDFX ? fx->dwDDFX : 0);
6466
6467     return E_NOTIMPL;
6468 }
6469
6470 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6471         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6472         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
6473 {
6474     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6475     const struct wined3d_format *src_format, *dst_format;
6476     struct wined3d_surface *orig_src = src_surface;
6477     WINED3DLOCKED_RECT dlock, slock;
6478     HRESULT hr = WINED3D_OK;
6479     const BYTE *sbuf;
6480     RECT xdst,xsrc;
6481     BYTE *dbuf;
6482     int x, y;
6483
6484     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6485             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6486             flags, fx, debug_d3dtexturefiltertype(filter));
6487
6488     xsrc = *src_rect;
6489
6490     if (!src_surface)
6491     {
6492         RECT full_rect;
6493
6494         full_rect.left = 0;
6495         full_rect.top = 0;
6496         full_rect.right = dst_surface->resource.width;
6497         full_rect.bottom = dst_surface->resource.height;
6498         IntersectRect(&xdst, &full_rect, dst_rect);
6499     }
6500     else
6501     {
6502         BOOL clip_horiz, clip_vert;
6503
6504         xdst = *dst_rect;
6505         clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6506         clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6507
6508         if (clip_vert || clip_horiz)
6509         {
6510             /* Now check if this is a special case or not... */
6511             if ((flags & WINEDDBLT_DDFX)
6512                     || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6513                     || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6514             {
6515                 WARN("Out of screen rectangle in special case. Not handled right now.\n");
6516                 return WINED3D_OK;
6517             }
6518
6519             if (clip_horiz)
6520             {
6521                 if (xdst.left < 0)
6522                 {
6523                     xsrc.left -= xdst.left;
6524                     xdst.left = 0;
6525                 }
6526                 if (xdst.right > dst_surface->resource.width)
6527                 {
6528                     xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6529                     xdst.right = (int)dst_surface->resource.width;
6530                 }
6531             }
6532
6533             if (clip_vert)
6534             {
6535                 if (xdst.top < 0)
6536                 {
6537                     xsrc.top -= xdst.top;
6538                     xdst.top = 0;
6539                 }
6540                 if (xdst.bottom > dst_surface->resource.height)
6541                 {
6542                     xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6543                     xdst.bottom = (int)dst_surface->resource.height;
6544                 }
6545             }
6546
6547             /* And check if after clipping something is still to be done... */
6548             if ((xdst.right <= 0) || (xdst.bottom <= 0)
6549                     || (xdst.left >= (int)dst_surface->resource.width)
6550                     || (xdst.top >= (int)dst_surface->resource.height)
6551                     || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6552                     || (xsrc.left >= (int)src_surface->resource.width)
6553                     || (xsrc.top >= (int)src_surface->resource.height))
6554             {
6555                 TRACE("Nothing to be done after clipping.\n");
6556                 return WINED3D_OK;
6557             }
6558         }
6559     }
6560
6561     if (src_surface == dst_surface)
6562     {
6563         wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6564         slock = dlock;
6565         src_format = dst_surface->resource.format;
6566         dst_format = src_format;
6567     }
6568     else
6569     {
6570         dst_format = dst_surface->resource.format;
6571         if (src_surface)
6572         {
6573             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6574             {
6575                 src_surface = surface_convert_format(src_surface, dst_format->id);
6576                 if (!src_surface)
6577                 {
6578                     /* The conv function writes a FIXME */
6579                     WARN("Cannot convert source surface format to dest format.\n");
6580                     goto release;
6581                 }
6582             }
6583             wined3d_surface_map(src_surface, &slock, NULL, WINED3DLOCK_READONLY);
6584             src_format = src_surface->resource.format;
6585         }
6586         else
6587         {
6588             src_format = dst_format;
6589         }
6590         if (dst_rect)
6591             wined3d_surface_map(dst_surface, &dlock, &xdst, 0);
6592         else
6593             wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6594     }
6595
6596     bpp = dst_surface->resource.format->byte_count;
6597     srcheight = xsrc.bottom - xsrc.top;
6598     srcwidth = xsrc.right - xsrc.left;
6599     dstheight = xdst.bottom - xdst.top;
6600     dstwidth = xdst.right - xdst.left;
6601     width = (xdst.right - xdst.left) * bpp;
6602
6603     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_COMPRESSED)
6604     {
6605         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6606
6607         if (src_surface == dst_surface)
6608         {
6609             FIXME("Only plain blits supported on compressed surfaces.\n");
6610             hr = E_NOTIMPL;
6611             goto release;
6612         }
6613
6614         if (srcheight != dstheight || srcwidth != dstwidth)
6615         {
6616             WARN("Stretching not supported on compressed surfaces.\n");
6617             hr = WINED3DERR_INVALIDCALL;
6618             goto release;
6619         }
6620
6621         if (srcwidth & (src_format->block_width - 1) || srcheight & (src_format->block_height - 1))
6622         {
6623             WARN("Rectangle not block-aligned.\n");
6624             hr = WINED3DERR_INVALIDCALL;
6625             goto release;
6626         }
6627
6628         hr = surface_cpu_blt_compressed(slock.pBits, dlock.pBits,
6629                 slock.Pitch, dlock.Pitch, dstwidth, dstheight,
6630                 src_format, flags, fx);
6631         goto release;
6632     }
6633
6634     if (dst_rect && src_surface != dst_surface)
6635         dbuf = dlock.pBits;
6636     else
6637         dbuf = (BYTE*)dlock.pBits+(xdst.top*dlock.Pitch)+(xdst.left*bpp);
6638
6639     /* First, all the 'source-less' blits */
6640     if (flags & WINEDDBLT_COLORFILL)
6641     {
6642         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dlock.Pitch, fx->u5.dwFillColor);
6643         flags &= ~WINEDDBLT_COLORFILL;
6644     }
6645
6646     if (flags & WINEDDBLT_DEPTHFILL)
6647     {
6648         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6649     }
6650     if (flags & WINEDDBLT_ROP)
6651     {
6652         /* Catch some degenerate cases here. */
6653         switch (fx->dwROP)
6654         {
6655             case BLACKNESS:
6656                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,0);
6657                 break;
6658             case 0xAA0029: /* No-op */
6659                 break;
6660             case WHITENESS:
6661                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,~0);
6662                 break;
6663             case SRCCOPY: /* Well, we do that below? */
6664                 break;
6665             default:
6666                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6667                 goto error;
6668         }
6669         flags &= ~WINEDDBLT_ROP;
6670     }
6671     if (flags & WINEDDBLT_DDROPS)
6672     {
6673         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6674     }
6675     /* Now the 'with source' blits. */
6676     if (src_surface)
6677     {
6678         const BYTE *sbase;
6679         int sx, xinc, sy, yinc;
6680
6681         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6682             goto release;
6683
6684         if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
6685                 && (srcwidth != dstwidth || srcheight != dstheight))
6686         {
6687             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6688             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6689         }
6690
6691         sbase = (BYTE*)slock.pBits+(xsrc.top*slock.Pitch)+xsrc.left*bpp;
6692         xinc = (srcwidth << 16) / dstwidth;
6693         yinc = (srcheight << 16) / dstheight;
6694
6695         if (!flags)
6696         {
6697             /* No effects, we can cheat here. */
6698             if (dstwidth == srcwidth)
6699             {
6700                 if (dstheight == srcheight)
6701                 {
6702                     /* No stretching in either direction. This needs to be as
6703                      * fast as possible. */
6704                     sbuf = sbase;
6705
6706                     /* Check for overlapping surfaces. */
6707                     if (src_surface != dst_surface || xdst.top < xsrc.top
6708                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6709                     {
6710                         /* No overlap, or dst above src, so copy from top downwards. */
6711                         for (y = 0; y < dstheight; ++y)
6712                         {
6713                             memcpy(dbuf, sbuf, width);
6714                             sbuf += slock.Pitch;
6715                             dbuf += dlock.Pitch;
6716                         }
6717                     }
6718                     else if (xdst.top > xsrc.top)
6719                     {
6720                         /* Copy from bottom upwards. */
6721                         sbuf += (slock.Pitch*dstheight);
6722                         dbuf += (dlock.Pitch*dstheight);
6723                         for (y = 0; y < dstheight; ++y)
6724                         {
6725                             sbuf -= slock.Pitch;
6726                             dbuf -= dlock.Pitch;
6727                             memcpy(dbuf, sbuf, width);
6728                         }
6729                     }
6730                     else
6731                     {
6732                         /* Src and dst overlapping on the same line, use memmove. */
6733                         for (y = 0; y < dstheight; ++y)
6734                         {
6735                             memmove(dbuf, sbuf, width);
6736                             sbuf += slock.Pitch;
6737                             dbuf += dlock.Pitch;
6738                         }
6739                     }
6740                 }
6741                 else
6742                 {
6743                     /* Stretching in y direction only. */
6744                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6745                     {
6746                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6747                         memcpy(dbuf, sbuf, width);
6748                         dbuf += dlock.Pitch;
6749                     }
6750                 }
6751             }
6752             else
6753             {
6754                 /* Stretching in X direction. */
6755                 int last_sy = -1;
6756                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6757                 {
6758                     sbuf = sbase + (sy >> 16) * slock.Pitch;
6759
6760                     if ((sy >> 16) == (last_sy >> 16))
6761                     {
6762                         /* This source row is the same as last source row -
6763                          * Copy the already stretched row. */
6764                         memcpy(dbuf, dbuf - dlock.Pitch, width);
6765                     }
6766                     else
6767                     {
6768 #define STRETCH_ROW(type) \
6769 do { \
6770     const type *s = (const type *)sbuf; \
6771     type *d = (type *)dbuf; \
6772     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6773         d[x] = s[sx >> 16]; \
6774 } while(0)
6775
6776                         switch(bpp)
6777                         {
6778                             case 1:
6779                                 STRETCH_ROW(BYTE);
6780                                 break;
6781                             case 2:
6782                                 STRETCH_ROW(WORD);
6783                                 break;
6784                             case 4:
6785                                 STRETCH_ROW(DWORD);
6786                                 break;
6787                             case 3:
6788                             {
6789                                 const BYTE *s;
6790                                 BYTE *d = dbuf;
6791                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6792                                 {
6793                                     DWORD pixel;
6794
6795                                     s = sbuf + 3 * (sx >> 16);
6796                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6797                                     d[0] = (pixel      ) & 0xff;
6798                                     d[1] = (pixel >>  8) & 0xff;
6799                                     d[2] = (pixel >> 16) & 0xff;
6800                                     d += 3;
6801                                 }
6802                                 break;
6803                             }
6804                             default:
6805                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6806                                 hr = WINED3DERR_NOTAVAILABLE;
6807                                 goto error;
6808                         }
6809 #undef STRETCH_ROW
6810                     }
6811                     dbuf += dlock.Pitch;
6812                     last_sy = sy;
6813                 }
6814             }
6815         }
6816         else
6817         {
6818             LONG dstyinc = dlock.Pitch, dstxinc = bpp;
6819             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6820             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6821             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6822             {
6823                 /* The color keying flags are checked for correctness in ddraw */
6824                 if (flags & WINEDDBLT_KEYSRC)
6825                 {
6826                     keylow  = src_surface->SrcBltCKey.dwColorSpaceLowValue;
6827                     keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
6828                 }
6829                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6830                 {
6831                     keylow = fx->ddckSrcColorkey.dwColorSpaceLowValue;
6832                     keyhigh = fx->ddckSrcColorkey.dwColorSpaceHighValue;
6833                 }
6834
6835                 if (flags & WINEDDBLT_KEYDEST)
6836                 {
6837                     /* Destination color keys are taken from the source surface! */
6838                     destkeylow = src_surface->DestBltCKey.dwColorSpaceLowValue;
6839                     destkeyhigh = src_surface->DestBltCKey.dwColorSpaceHighValue;
6840                 }
6841                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6842                 {
6843                     destkeylow = fx->ddckDestColorkey.dwColorSpaceLowValue;
6844                     destkeyhigh = fx->ddckDestColorkey.dwColorSpaceHighValue;
6845                 }
6846
6847                 if (bpp == 1)
6848                 {
6849                     keymask = 0xff;
6850                 }
6851                 else
6852                 {
6853                     keymask = src_format->red_mask
6854                             | src_format->green_mask
6855                             | src_format->blue_mask;
6856                 }
6857                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6858             }
6859
6860             if (flags & WINEDDBLT_DDFX)
6861             {
6862                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6863                 LONG tmpxy;
6864                 dTopLeft     = dbuf;
6865                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6866                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dlock.Pitch);
6867                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6868
6869                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6870                 {
6871                     /* I don't think we need to do anything about this flag */
6872                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6873                 }
6874                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6875                 {
6876                     tmp          = dTopRight;
6877                     dTopRight    = dTopLeft;
6878                     dTopLeft     = tmp;
6879                     tmp          = dBottomRight;
6880                     dBottomRight = dBottomLeft;
6881                     dBottomLeft  = tmp;
6882                     dstxinc = dstxinc * -1;
6883                 }
6884                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6885                 {
6886                     tmp          = dTopLeft;
6887                     dTopLeft     = dBottomLeft;
6888                     dBottomLeft  = tmp;
6889                     tmp          = dTopRight;
6890                     dTopRight    = dBottomRight;
6891                     dBottomRight = tmp;
6892                     dstyinc = dstyinc * -1;
6893                 }
6894                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6895                 {
6896                     /* I don't think we need to do anything about this flag */
6897                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6898                 }
6899                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6900                 {
6901                     tmp          = dBottomRight;
6902                     dBottomRight = dTopLeft;
6903                     dTopLeft     = tmp;
6904                     tmp          = dBottomLeft;
6905                     dBottomLeft  = dTopRight;
6906                     dTopRight    = tmp;
6907                     dstxinc = dstxinc * -1;
6908                     dstyinc = dstyinc * -1;
6909                 }
6910                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6911                 {
6912                     tmp          = dTopLeft;
6913                     dTopLeft     = dBottomLeft;
6914                     dBottomLeft  = dBottomRight;
6915                     dBottomRight = dTopRight;
6916                     dTopRight    = tmp;
6917                     tmpxy   = dstxinc;
6918                     dstxinc = dstyinc;
6919                     dstyinc = tmpxy;
6920                     dstxinc = dstxinc * -1;
6921                 }
6922                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6923                 {
6924                     tmp          = dTopLeft;
6925                     dTopLeft     = dTopRight;
6926                     dTopRight    = dBottomRight;
6927                     dBottomRight = dBottomLeft;
6928                     dBottomLeft  = tmp;
6929                     tmpxy   = dstxinc;
6930                     dstxinc = dstyinc;
6931                     dstyinc = tmpxy;
6932                     dstyinc = dstyinc * -1;
6933                 }
6934                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6935                 {
6936                     /* I don't think we need to do anything about this flag */
6937                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6938                 }
6939                 dbuf = dTopLeft;
6940                 flags &= ~(WINEDDBLT_DDFX);
6941             }
6942
6943 #define COPY_COLORKEY_FX(type) \
6944 do { \
6945     const type *s; \
6946     type *d = (type *)dbuf, *dx, tmp; \
6947     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
6948     { \
6949         s = (const type *)(sbase + (sy >> 16) * slock.Pitch); \
6950         dx = d; \
6951         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6952         { \
6953             tmp = s[sx >> 16]; \
6954             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
6955                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
6956             { \
6957                 dx[0] = tmp; \
6958             } \
6959             dx = (type *)(((BYTE *)dx) + dstxinc); \
6960         } \
6961         d = (type *)(((BYTE *)d) + dstyinc); \
6962     } \
6963 } while(0)
6964
6965             switch (bpp)
6966             {
6967                 case 1:
6968                     COPY_COLORKEY_FX(BYTE);
6969                     break;
6970                 case 2:
6971                     COPY_COLORKEY_FX(WORD);
6972                     break;
6973                 case 4:
6974                     COPY_COLORKEY_FX(DWORD);
6975                     break;
6976                 case 3:
6977                 {
6978                     const BYTE *s;
6979                     BYTE *d = dbuf, *dx;
6980                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6981                     {
6982                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6983                         dx = d;
6984                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
6985                         {
6986                             DWORD pixel, dpixel = 0;
6987                             s = sbuf + 3 * (sx>>16);
6988                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6989                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
6990                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
6991                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
6992                             {
6993                                 dx[0] = (pixel      ) & 0xff;
6994                                 dx[1] = (pixel >>  8) & 0xff;
6995                                 dx[2] = (pixel >> 16) & 0xff;
6996                             }
6997                             dx += dstxinc;
6998                         }
6999                         d += dstyinc;
7000                     }
7001                     break;
7002                 }
7003                 default:
7004                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7005                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7006                     hr = WINED3DERR_NOTAVAILABLE;
7007                     goto error;
7008 #undef COPY_COLORKEY_FX
7009             }
7010         }
7011     }
7012
7013 error:
7014     if (flags && FIXME_ON(d3d_surface))
7015     {
7016         FIXME("\tUnsupported flags: %#x.\n", flags);
7017     }
7018
7019 release:
7020     wined3d_surface_unmap(dst_surface);
7021     if (src_surface && src_surface != dst_surface)
7022         wined3d_surface_unmap(src_surface);
7023     /* Release the converted surface, if any. */
7024     if (src_surface && src_surface != orig_src)
7025         wined3d_surface_decref(src_surface);
7026
7027     return hr;
7028 }
7029
7030 /* Do not call while under the GL lock. */
7031 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7032         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
7033 {
7034     static const RECT src_rect;
7035     WINEDDBLTFX BltFx;
7036
7037     memset(&BltFx, 0, sizeof(BltFx));
7038     BltFx.dwSize = sizeof(BltFx);
7039     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7040     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7041             WINEDDBLT_COLORFILL, &BltFx, WINED3DTEXF_POINT);
7042 }
7043
7044 /* Do not call while under the GL lock. */
7045 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7046         struct wined3d_surface *surface, const RECT *rect, float depth)
7047 {
7048     FIXME("Depth filling not implemented by cpu_blit.\n");
7049     return WINED3DERR_INVALIDCALL;
7050 }
7051
7052 const struct blit_shader cpu_blit =  {
7053     cpu_blit_alloc,
7054     cpu_blit_free,
7055     cpu_blit_set,
7056     cpu_blit_unset,
7057     cpu_blit_supported,
7058     cpu_blit_color_fill,
7059     cpu_blit_depth_fill,
7060 };
7061
7062 static HRESULT surface_init(struct wined3d_surface *surface, WINED3DSURFTYPE surface_type, UINT alignment,
7063         UINT width, UINT height, UINT level, BOOL lockable, BOOL discard, WINED3DMULTISAMPLE_TYPE multisample_type,
7064         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7065         WINED3DPOOL pool, void *parent, const struct wined3d_parent_ops *parent_ops)
7066 {
7067     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7068     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7069     unsigned int resource_size;
7070     HRESULT hr;
7071
7072     if (multisample_quality > 0)
7073     {
7074         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7075         multisample_quality = 0;
7076     }
7077
7078     /* Quick lockable sanity check.
7079      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7080      * this function is too deep to need to care about things like this.
7081      * Levels need to be checked too, since they all affect what can be done. */
7082     switch (pool)
7083     {
7084         case WINED3DPOOL_SCRATCH:
7085             if (!lockable)
7086             {
7087                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7088                         "which are mutually exclusive, setting lockable to TRUE.\n");
7089                 lockable = TRUE;
7090             }
7091             break;
7092
7093         case WINED3DPOOL_SYSTEMMEM:
7094             if (!lockable)
7095                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7096             break;
7097
7098         case WINED3DPOOL_MANAGED:
7099             if (usage & WINED3DUSAGE_DYNAMIC)
7100                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7101             break;
7102
7103         case WINED3DPOOL_DEFAULT:
7104             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7105                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7106             break;
7107
7108         default:
7109             FIXME("Unknown pool %#x.\n", pool);
7110             break;
7111     };
7112
7113     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3DPOOL_DEFAULT)
7114         FIXME("Trying to create a render target that isn't in the default pool.\n");
7115
7116     /* FIXME: Check that the format is supported by the device. */
7117
7118     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7119     if (!resource_size)
7120         return WINED3DERR_INVALIDCALL;
7121
7122     surface->surface_type = surface_type;
7123
7124     switch (surface_type)
7125     {
7126         case SURFACE_OPENGL:
7127             surface->surface_ops = &surface_ops;
7128             break;
7129
7130         case SURFACE_GDI:
7131             surface->surface_ops = &gdi_surface_ops;
7132             break;
7133
7134         default:
7135             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7136             return WINED3DERR_INVALIDCALL;
7137     }
7138
7139     hr = resource_init(&surface->resource, device, WINED3DRTYPE_SURFACE, format,
7140             multisample_type, multisample_quality, usage, pool, width, height, 1,
7141             resource_size, parent, parent_ops, &surface_resource_ops);
7142     if (FAILED(hr))
7143     {
7144         WARN("Failed to initialize resource, returning %#x.\n", hr);
7145         return hr;
7146     }
7147
7148     /* "Standalone" surface. */
7149     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7150
7151     surface->texture_level = level;
7152     list_init(&surface->overlays);
7153
7154     /* Flags */
7155     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7156     if (discard)
7157         surface->flags |= SFLAG_DISCARD;
7158     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7159         surface->flags |= SFLAG_LOCKABLE;
7160     /* I'm not sure if this qualifies as a hack or as an optimization. It
7161      * seems reasonable to assume that lockable render targets will get
7162      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7163      * creation. However, the other reason we want to do this is that several
7164      * ddraw applications access surface memory while the surface isn't
7165      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7166      * future locks prevents these from crashing. */
7167     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7168         surface->flags |= SFLAG_DYNLOCK;
7169
7170     /* Mark the texture as dirty so that it gets loaded first time around. */
7171     surface_add_dirty_rect(surface, NULL);
7172     list_init(&surface->renderbuffers);
7173
7174     TRACE("surface %p, memory %p, size %u\n",
7175             surface, surface->resource.allocatedMemory, surface->resource.size);
7176
7177     /* Call the private setup routine */
7178     hr = surface->surface_ops->surface_private_setup(surface);
7179     if (FAILED(hr))
7180     {
7181         ERR("Private setup failed, returning %#x\n", hr);
7182         surface_cleanup(surface);
7183         return hr;
7184     }
7185
7186     return hr;
7187 }
7188
7189 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7190         enum wined3d_format_id format_id, BOOL lockable, BOOL discard, UINT level, DWORD usage, WINED3DPOOL pool,
7191         WINED3DMULTISAMPLE_TYPE multisample_type, DWORD multisample_quality, WINED3DSURFTYPE surface_type,
7192         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7193 {
7194     struct wined3d_surface *object;
7195     HRESULT hr;
7196
7197     TRACE("device %p, width %u, height %u, format %s, lockable %#x, discard %#x, level %u\n",
7198             device, width, height, debug_d3dformat(format_id), lockable, discard, level);
7199     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7200             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7201     TRACE("surface_type %#x, parent %p, parent_ops %p.\n", surface_type, parent, parent_ops);
7202
7203     if (surface_type == SURFACE_OPENGL && !device->adapter)
7204     {
7205         ERR("OpenGL surfaces are not available without OpenGL.\n");
7206         return WINED3DERR_NOTAVAILABLE;
7207     }
7208
7209     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7210     if (!object)
7211     {
7212         ERR("Failed to allocate surface memory.\n");
7213         return WINED3DERR_OUTOFVIDEOMEMORY;
7214     }
7215
7216     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level, lockable,
7217             discard, multisample_type, multisample_quality, device, usage, format_id, pool, parent, parent_ops);
7218     if (FAILED(hr))
7219     {
7220         WARN("Failed to initialize surface, returning %#x.\n", hr);
7221         HeapFree(GetProcessHeap(), 0, object);
7222         return hr;
7223     }
7224
7225     TRACE("Created surface %p.\n", object);
7226     *surface = object;
7227
7228     return hr;
7229 }