mshtml: Added IOleContainer::EnumObjects tests.
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2011 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         enum wined3d_texture_filter_type filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     struct wined3d_surface *overlay, *cur;
46
47     TRACE("surface %p.\n", surface);
48
49     if (surface->texture_name || (surface->flags & SFLAG_PBO)
50              || surface->rb_multisample || surface->rb_resolved
51              || !list_empty(&surface->renderbuffers))
52     {
53         struct wined3d_renderbuffer_entry *entry, *entry2;
54         const struct wined3d_gl_info *gl_info;
55         struct wined3d_context *context;
56
57         context = context_acquire(surface->resource.device, NULL);
58         gl_info = context->gl_info;
59
60         ENTER_GL();
61
62         if (surface->texture_name)
63         {
64             TRACE("Deleting texture %u.\n", surface->texture_name);
65             glDeleteTextures(1, &surface->texture_name);
66         }
67
68         if (surface->flags & SFLAG_PBO)
69         {
70             TRACE("Deleting PBO %u.\n", surface->pbo);
71             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
72         }
73
74         if (surface->rb_multisample)
75         {
76             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
77             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
78         }
79
80         if (surface->rb_resolved)
81         {
82             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
83             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
84         }
85
86         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
87         {
88             TRACE("Deleting renderbuffer %u.\n", entry->id);
89             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
90             HeapFree(GetProcessHeap(), 0, entry);
91         }
92
93         LEAVE_GL();
94
95         context_release(context);
96     }
97
98     if (surface->flags & SFLAG_DIBSECTION)
99     {
100         DeleteDC(surface->hDC);
101         DeleteObject(surface->dib.DIBsection);
102         surface->dib.bitmap_data = NULL;
103         surface->resource.allocatedMemory = NULL;
104     }
105
106     if (surface->flags & SFLAG_USERPTR)
107         wined3d_surface_set_mem(surface, NULL);
108     if (surface->overlay_dest)
109         list_remove(&surface->overlay_entry);
110
111     LIST_FOR_EACH_ENTRY_SAFE(overlay, cur, &surface->overlays, struct wined3d_surface, overlay_entry)
112     {
113         list_remove(&overlay->overlay_entry);
114         overlay->overlay_dest = NULL;
115     }
116
117     resource_cleanup(&surface->resource);
118 }
119
120 void surface_update_draw_binding(struct wined3d_surface *surface)
121 {
122     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
123         surface->draw_binding = SFLAG_INDRAWABLE;
124     else if (surface->resource.multisample_type)
125         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
126     else
127         surface->draw_binding = SFLAG_INTEXTURE;
128 }
129
130 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
131 {
132     TRACE("surface %p, container %p.\n", surface, container);
133
134     if (!container && type != WINED3D_CONTAINER_NONE)
135         ERR("Setting NULL container of type %#x.\n", type);
136
137     if (type == WINED3D_CONTAINER_SWAPCHAIN)
138     {
139         surface->get_drawable_size = get_drawable_size_swapchain;
140     }
141     else
142     {
143         switch (wined3d_settings.offscreen_rendering_mode)
144         {
145             case ORM_FBO:
146                 surface->get_drawable_size = get_drawable_size_fbo;
147                 break;
148
149             case ORM_BACKBUFFER:
150                 surface->get_drawable_size = get_drawable_size_backbuffer;
151                 break;
152
153             default:
154                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
155                 return;
156         }
157     }
158
159     surface->container.type = type;
160     surface->container.u.base = container;
161     surface_update_draw_binding(surface);
162 }
163
164 struct blt_info
165 {
166     GLenum binding;
167     GLenum bind_target;
168     enum tex_types tex_type;
169     GLfloat coords[4][3];
170 };
171
172 struct float_rect
173 {
174     float l;
175     float t;
176     float r;
177     float b;
178 };
179
180 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
181 {
182     f->l = ((r->left * 2.0f) / w) - 1.0f;
183     f->t = ((r->top * 2.0f) / h) - 1.0f;
184     f->r = ((r->right * 2.0f) / w) - 1.0f;
185     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
186 }
187
188 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
189 {
190     GLfloat (*coords)[3] = info->coords;
191     struct float_rect f;
192
193     switch (target)
194     {
195         default:
196             FIXME("Unsupported texture target %#x\n", target);
197             /* Fall back to GL_TEXTURE_2D */
198         case GL_TEXTURE_2D:
199             info->binding = GL_TEXTURE_BINDING_2D;
200             info->bind_target = GL_TEXTURE_2D;
201             info->tex_type = tex_2d;
202             coords[0][0] = (float)rect->left / w;
203             coords[0][1] = (float)rect->top / h;
204             coords[0][2] = 0.0f;
205
206             coords[1][0] = (float)rect->right / w;
207             coords[1][1] = (float)rect->top / h;
208             coords[1][2] = 0.0f;
209
210             coords[2][0] = (float)rect->left / w;
211             coords[2][1] = (float)rect->bottom / h;
212             coords[2][2] = 0.0f;
213
214             coords[3][0] = (float)rect->right / w;
215             coords[3][1] = (float)rect->bottom / h;
216             coords[3][2] = 0.0f;
217             break;
218
219         case GL_TEXTURE_RECTANGLE_ARB:
220             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
221             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
222             info->tex_type = tex_rect;
223             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
224             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
225             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
226             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
227             break;
228
229         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
230             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
231             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
232             info->tex_type = tex_cube;
233             cube_coords_float(rect, w, h, &f);
234
235             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
236             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
237             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
238             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
239             break;
240
241         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
242             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
243             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
244             info->tex_type = tex_cube;
245             cube_coords_float(rect, w, h, &f);
246
247             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
248             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
249             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
250             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
251             break;
252
253         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
254             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
255             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
256             info->tex_type = tex_cube;
257             cube_coords_float(rect, w, h, &f);
258
259             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
260             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
261             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
262             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
263             break;
264
265         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
266             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
267             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
268             info->tex_type = tex_cube;
269             cube_coords_float(rect, w, h, &f);
270
271             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
272             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
273             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
274             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
275             break;
276
277         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
278             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
279             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
280             info->tex_type = tex_cube;
281             cube_coords_float(rect, w, h, &f);
282
283             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
284             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
285             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
286             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
287             break;
288
289         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
290             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
291             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
292             info->tex_type = tex_cube;
293             cube_coords_float(rect, w, h, &f);
294
295             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
296             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
297             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
298             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
299             break;
300     }
301 }
302
303 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
304 {
305     if (rect_in)
306         *rect_out = *rect_in;
307     else
308     {
309         rect_out->left = 0;
310         rect_out->top = 0;
311         rect_out->right = surface->resource.width;
312         rect_out->bottom = surface->resource.height;
313     }
314 }
315
316 /* GL locking and context activation is done by the caller */
317 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
318         const RECT *src_rect, const RECT *dst_rect, enum wined3d_texture_filter_type filter)
319 {
320     struct blt_info info;
321
322     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
323
324     glEnable(info.bind_target);
325     checkGLcall("glEnable(bind_target)");
326
327     context_bind_texture(context, info.bind_target, src_surface->texture_name);
328
329     /* Filtering for StretchRect */
330     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
331             wined3d_gl_mag_filter(magLookup, filter));
332     checkGLcall("glTexParameteri");
333     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
334             wined3d_gl_min_mip_filter(minMipLookup, filter, WINED3D_TEXF_NONE));
335     checkGLcall("glTexParameteri");
336     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
337     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
338     if (context->gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
339         glTexParameteri(info.bind_target, GL_TEXTURE_SRGB_DECODE_EXT, GL_SKIP_DECODE_EXT);
340     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
341     checkGLcall("glTexEnvi");
342
343     /* Draw a quad */
344     glBegin(GL_TRIANGLE_STRIP);
345     glTexCoord3fv(info.coords[0]);
346     glVertex2i(dst_rect->left, dst_rect->top);
347
348     glTexCoord3fv(info.coords[1]);
349     glVertex2i(dst_rect->right, dst_rect->top);
350
351     glTexCoord3fv(info.coords[2]);
352     glVertex2i(dst_rect->left, dst_rect->bottom);
353
354     glTexCoord3fv(info.coords[3]);
355     glVertex2i(dst_rect->right, dst_rect->bottom);
356     glEnd();
357
358     /* Unbind the texture */
359     context_bind_texture(context, info.bind_target, 0);
360
361     /* We changed the filtering settings on the texture. Inform the
362      * container about this to get the filters reset properly next draw. */
363     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
364     {
365         struct wined3d_texture *texture = src_surface->container.u.texture;
366         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3D_TEXF_POINT;
367         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3D_TEXF_POINT;
368         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3D_TEXF_NONE;
369         texture->texture_rgb.states[WINED3DTEXSTA_SRGBTEXTURE] = FALSE;
370     }
371 }
372
373 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
374 {
375     const struct wined3d_format *format = surface->resource.format;
376     SYSTEM_INFO sysInfo;
377     BITMAPINFO *b_info;
378     int extraline = 0;
379     DWORD *masks;
380     UINT usage;
381     HDC dc;
382
383     TRACE("surface %p.\n", surface);
384
385     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
386     {
387         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
388         return WINED3DERR_INVALIDCALL;
389     }
390
391     switch (format->byte_count)
392     {
393         case 2:
394         case 4:
395             /* Allocate extra space to store the RGB bit masks. */
396             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
397             break;
398
399         case 3:
400             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
401             break;
402
403         default:
404             /* Allocate extra space for a palette. */
405             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
406                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
407             break;
408     }
409
410     if (!b_info)
411         return E_OUTOFMEMORY;
412
413     /* Some applications access the surface in via DWORDs, and do not take
414      * the necessary care at the end of the surface. So we need at least
415      * 4 extra bytes at the end of the surface. Check against the page size,
416      * if the last page used for the surface has at least 4 spare bytes we're
417      * safe, otherwise add an extra line to the DIB section. */
418     GetSystemInfo(&sysInfo);
419     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
420     {
421         extraline = 1;
422         TRACE("Adding an extra line to the DIB section.\n");
423     }
424
425     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
426     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
427     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
428     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
429     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
430             * wined3d_surface_get_pitch(surface);
431     b_info->bmiHeader.biPlanes = 1;
432     b_info->bmiHeader.biBitCount = format->byte_count * 8;
433
434     b_info->bmiHeader.biXPelsPerMeter = 0;
435     b_info->bmiHeader.biYPelsPerMeter = 0;
436     b_info->bmiHeader.biClrUsed = 0;
437     b_info->bmiHeader.biClrImportant = 0;
438
439     /* Get the bit masks */
440     masks = (DWORD *)b_info->bmiColors;
441     switch (surface->resource.format->id)
442     {
443         case WINED3DFMT_B8G8R8_UNORM:
444             usage = DIB_RGB_COLORS;
445             b_info->bmiHeader.biCompression = BI_RGB;
446             break;
447
448         case WINED3DFMT_B5G5R5X1_UNORM:
449         case WINED3DFMT_B5G5R5A1_UNORM:
450         case WINED3DFMT_B4G4R4A4_UNORM:
451         case WINED3DFMT_B4G4R4X4_UNORM:
452         case WINED3DFMT_B2G3R3_UNORM:
453         case WINED3DFMT_B2G3R3A8_UNORM:
454         case WINED3DFMT_R10G10B10A2_UNORM:
455         case WINED3DFMT_R8G8B8A8_UNORM:
456         case WINED3DFMT_R8G8B8X8_UNORM:
457         case WINED3DFMT_B10G10R10A2_UNORM:
458         case WINED3DFMT_B5G6R5_UNORM:
459         case WINED3DFMT_R16G16B16A16_UNORM:
460             usage = 0;
461             b_info->bmiHeader.biCompression = BI_BITFIELDS;
462             masks[0] = format->red_mask;
463             masks[1] = format->green_mask;
464             masks[2] = format->blue_mask;
465             break;
466
467         default:
468             /* Don't know palette */
469             b_info->bmiHeader.biCompression = BI_RGB;
470             usage = 0;
471             break;
472     }
473
474     if (!(dc = GetDC(0)))
475     {
476         HeapFree(GetProcessHeap(), 0, b_info);
477         return HRESULT_FROM_WIN32(GetLastError());
478     }
479
480     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
481             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
482             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
483     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
484     ReleaseDC(0, dc);
485
486     if (!surface->dib.DIBsection)
487     {
488         ERR("Failed to create DIB section.\n");
489         HeapFree(GetProcessHeap(), 0, b_info);
490         return HRESULT_FROM_WIN32(GetLastError());
491     }
492
493     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
494     /* Copy the existing surface to the dib section. */
495     if (surface->resource.allocatedMemory)
496     {
497         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
498                 surface->resource.height * wined3d_surface_get_pitch(surface));
499     }
500     else
501     {
502         /* This is to make maps read the GL texture although memory is allocated. */
503         surface->flags &= ~SFLAG_INSYSMEM;
504     }
505     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
506
507     HeapFree(GetProcessHeap(), 0, b_info);
508
509     /* Now allocate a DC. */
510     surface->hDC = CreateCompatibleDC(0);
511     SelectObject(surface->hDC, surface->dib.DIBsection);
512     TRACE("Using wined3d palette %p.\n", surface->palette);
513     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
514
515     surface->flags |= SFLAG_DIBSECTION;
516
517     return WINED3D_OK;
518 }
519
520 static BOOL surface_need_pbo(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
521 {
522     if (surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
523         return FALSE;
524     if (!(surface->flags & SFLAG_DYNLOCK))
525         return FALSE;
526     if (surface->flags & (SFLAG_CONVERTED | SFLAG_NONPOW2 | SFLAG_PIN_SYSMEM))
527         return FALSE;
528     if (!gl_info->supported[ARB_PIXEL_BUFFER_OBJECT])
529         return FALSE;
530
531     return TRUE;
532 }
533
534 static void surface_load_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
535 {
536     struct wined3d_context *context;
537     GLenum error;
538
539     context = context_acquire(surface->resource.device, NULL);
540     ENTER_GL();
541
542     GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
543     error = glGetError();
544     if (!surface->pbo || error != GL_NO_ERROR)
545         ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
546
547     TRACE("Binding PBO %u.\n", surface->pbo);
548
549     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
550     checkGLcall("glBindBufferARB");
551
552     GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
553             surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
554     checkGLcall("glBufferDataARB");
555
556     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
557     checkGLcall("glBindBufferARB");
558
559     /* We don't need the system memory anymore and we can't even use it for PBOs. */
560     if (!(surface->flags & SFLAG_CLIENT))
561     {
562         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
563         surface->resource.heapMemory = NULL;
564     }
565     surface->resource.allocatedMemory = NULL;
566     surface->flags |= SFLAG_PBO;
567     LEAVE_GL();
568     context_release(context);
569 }
570
571 static void surface_prepare_system_memory(struct wined3d_surface *surface)
572 {
573     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
574
575     TRACE("surface %p.\n", surface);
576
577     if (!(surface->flags & SFLAG_PBO) && surface_need_pbo(surface, gl_info))
578         surface_load_pbo(surface, gl_info);
579     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
580     {
581         /* Whatever surface we have, make sure that there is memory allocated
582          * for the downloaded copy, or a PBO to map. */
583         if (!surface->resource.heapMemory)
584             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
585
586         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
587                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
588
589         if (surface->flags & SFLAG_INSYSMEM)
590             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
591     }
592 }
593
594 static void surface_evict_sysmem(struct wined3d_surface *surface)
595 {
596     if (surface->flags & SFLAG_DONOTFREE)
597         return;
598
599     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
600     surface->resource.allocatedMemory = NULL;
601     surface->resource.heapMemory = NULL;
602     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
603 }
604
605 /* Context activation is done by the caller. */
606 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
607         struct wined3d_context *context, BOOL srgb)
608 {
609     struct wined3d_device *device = surface->resource.device;
610     DWORD active_sampler;
611
612     /* We don't need a specific texture unit, but after binding the texture
613      * the current unit is dirty. Read the unit back instead of switching to
614      * 0, this avoids messing around with the state manager's GL states. The
615      * current texture unit should always be a valid one.
616      *
617      * To be more specific, this is tricky because we can implicitly be
618      * called from sampler() in state.c. This means we can't touch anything
619      * other than whatever happens to be the currently active texture, or we
620      * would risk marking already applied sampler states dirty again. */
621     active_sampler = device->rev_tex_unit_map[context->active_texture];
622
623     if (active_sampler != WINED3D_UNMAPPED_STAGE)
624         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
625     surface_bind(surface, context, srgb);
626 }
627
628 static void surface_force_reload(struct wined3d_surface *surface)
629 {
630     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
631 }
632
633 static void surface_release_client_storage(struct wined3d_surface *surface)
634 {
635     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
636
637     ENTER_GL();
638     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
639     if (surface->texture_name)
640     {
641         surface_bind_and_dirtify(surface, context, FALSE);
642         glTexImage2D(surface->texture_target, surface->texture_level,
643                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
644     }
645     if (surface->texture_name_srgb)
646     {
647         surface_bind_and_dirtify(surface, context, TRUE);
648         glTexImage2D(surface->texture_target, surface->texture_level,
649                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
650     }
651     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
652     LEAVE_GL();
653
654     context_release(context);
655
656     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
657     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
658     surface_force_reload(surface);
659 }
660
661 static HRESULT surface_private_setup(struct wined3d_surface *surface)
662 {
663     /* TODO: Check against the maximum texture sizes supported by the video card. */
664     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
665     unsigned int pow2Width, pow2Height;
666
667     TRACE("surface %p.\n", surface);
668
669     surface->texture_name = 0;
670     surface->texture_target = GL_TEXTURE_2D;
671
672     /* Non-power2 support */
673     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
674     {
675         pow2Width = surface->resource.width;
676         pow2Height = surface->resource.height;
677     }
678     else
679     {
680         /* Find the nearest pow2 match */
681         pow2Width = pow2Height = 1;
682         while (pow2Width < surface->resource.width)
683             pow2Width <<= 1;
684         while (pow2Height < surface->resource.height)
685             pow2Height <<= 1;
686     }
687     surface->pow2Width = pow2Width;
688     surface->pow2Height = pow2Height;
689
690     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
691     {
692         /* TODO: Add support for non power two compressed textures. */
693         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
694         {
695             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
696                   surface, surface->resource.width, surface->resource.height);
697             return WINED3DERR_NOTAVAILABLE;
698         }
699     }
700
701     if (pow2Width != surface->resource.width
702             || pow2Height != surface->resource.height)
703     {
704         surface->flags |= SFLAG_NONPOW2;
705     }
706
707     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
708             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
709     {
710         /* One of three options:
711          * 1: Do the same as we do with NPOT and scale the texture, (any
712          *    texture ops would require the texture to be scaled which is
713          *    potentially slow)
714          * 2: Set the texture to the maximum size (bad idea).
715          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
716          * 4: Create the surface, but allow it to be used only for DirectDraw
717          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
718          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
719          *    the render target. */
720         if (surface->resource.pool == WINED3D_POOL_DEFAULT || surface->resource.pool == WINED3D_POOL_MANAGED)
721         {
722             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
723             return WINED3DERR_NOTAVAILABLE;
724         }
725
726         /* We should never use this surface in combination with OpenGL! */
727         TRACE("Creating an oversized surface: %ux%u.\n",
728                 surface->pow2Width, surface->pow2Height);
729     }
730     else
731     {
732         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
733          * and EXT_PALETTED_TEXTURE is used in combination with texture
734          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
735          * EXT_PALETTED_TEXTURE doesn't work in combination with
736          * ARB_TEXTURE_RECTANGLE. */
737         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
738                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
739                 && gl_info->supported[EXT_PALETTED_TEXTURE]
740                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
741         {
742             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
743             surface->pow2Width = surface->resource.width;
744             surface->pow2Height = surface->resource.height;
745             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
746         }
747     }
748
749     switch (wined3d_settings.offscreen_rendering_mode)
750     {
751         case ORM_FBO:
752             surface->get_drawable_size = get_drawable_size_fbo;
753             break;
754
755         case ORM_BACKBUFFER:
756             surface->get_drawable_size = get_drawable_size_backbuffer;
757             break;
758
759         default:
760             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
761             return WINED3DERR_INVALIDCALL;
762     }
763
764     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
765         surface->flags |= SFLAG_LOST;
766
767     return WINED3D_OK;
768 }
769
770 static void surface_realize_palette(struct wined3d_surface *surface)
771 {
772     struct wined3d_palette *palette = surface->palette;
773
774     TRACE("surface %p.\n", surface);
775
776     if (!palette) return;
777
778     if (surface->resource.format->id == WINED3DFMT_P8_UINT
779             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
780     {
781         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
782         {
783             /* Make sure the texture is up to date. This call doesn't do
784              * anything if the texture is already up to date. */
785             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
786
787             /* We want to force a palette refresh, so mark the drawable as not being up to date */
788             if (!surface_is_offscreen(surface))
789                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
790         }
791         else
792         {
793             if (!(surface->flags & SFLAG_INSYSMEM))
794             {
795                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
796                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
797             }
798             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
799         }
800     }
801
802     if (surface->flags & SFLAG_DIBSECTION)
803     {
804         RGBQUAD col[256];
805         unsigned int i;
806
807         TRACE("Updating the DC's palette.\n");
808
809         for (i = 0; i < 256; ++i)
810         {
811             col[i].rgbRed   = palette->palents[i].peRed;
812             col[i].rgbGreen = palette->palents[i].peGreen;
813             col[i].rgbBlue  = palette->palents[i].peBlue;
814             col[i].rgbReserved = 0;
815         }
816         SetDIBColorTable(surface->hDC, 0, 256, col);
817     }
818
819     /* Propagate the changes to the drawable when we have a palette. */
820     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
821         surface_load_location(surface, surface->draw_binding, NULL);
822 }
823
824 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
825 {
826     HRESULT hr;
827
828     /* If there's no destination surface there is nothing to do. */
829     if (!surface->overlay_dest)
830         return WINED3D_OK;
831
832     /* Blt calls ModifyLocation on the dest surface, which in turn calls
833      * DrawOverlay to update the overlay. Prevent an endless recursion. */
834     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
835         return WINED3D_OK;
836
837     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
838     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
839             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3D_TEXF_LINEAR);
840     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
841
842     return hr;
843 }
844
845 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
846 {
847     struct wined3d_device *device = surface->resource.device;
848     const RECT *pass_rect = rect;
849
850     TRACE("surface %p, rect %s, flags %#x.\n",
851             surface, wine_dbgstr_rect(rect), flags);
852
853     if (flags & WINED3DLOCK_DISCARD)
854     {
855         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
856         surface_prepare_system_memory(surface);
857         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
858     }
859     else
860     {
861         /* surface_load_location() does not check if the rectangle specifies
862          * the full surface. Most callers don't need that, so do it here. */
863         if (rect && !rect->top && !rect->left
864                 && rect->right == surface->resource.width
865                 && rect->bottom == surface->resource.height)
866             pass_rect = NULL;
867         surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
868     }
869
870     if (surface->flags & SFLAG_PBO)
871     {
872         const struct wined3d_gl_info *gl_info;
873         struct wined3d_context *context;
874
875         context = context_acquire(device, NULL);
876         gl_info = context->gl_info;
877
878         ENTER_GL();
879         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
880         checkGLcall("glBindBufferARB");
881
882         /* This shouldn't happen but could occur if some other function
883          * didn't handle the PBO properly. */
884         if (surface->resource.allocatedMemory)
885             ERR("The surface already has PBO memory allocated.\n");
886
887         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
888         checkGLcall("glMapBufferARB");
889
890         /* Make sure the PBO isn't set anymore in order not to break non-PBO
891          * calls. */
892         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
893         checkGLcall("glBindBufferARB");
894
895         LEAVE_GL();
896         context_release(context);
897     }
898
899     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
900     {
901         if (!rect)
902             surface_add_dirty_rect(surface, NULL);
903         else
904         {
905             struct wined3d_box b;
906
907             b.left = rect->left;
908             b.top = rect->top;
909             b.right = rect->right;
910             b.bottom = rect->bottom;
911             b.front = 0;
912             b.back = 1;
913             surface_add_dirty_rect(surface, &b);
914         }
915     }
916 }
917
918 static void surface_unmap(struct wined3d_surface *surface)
919 {
920     struct wined3d_device *device = surface->resource.device;
921     BOOL fullsurface;
922
923     TRACE("surface %p.\n", surface);
924
925     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
926
927     if (surface->flags & SFLAG_PBO)
928     {
929         const struct wined3d_gl_info *gl_info;
930         struct wined3d_context *context;
931
932         TRACE("Freeing PBO memory.\n");
933
934         context = context_acquire(device, NULL);
935         gl_info = context->gl_info;
936
937         ENTER_GL();
938         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
939         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
940         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
941         checkGLcall("glUnmapBufferARB");
942         LEAVE_GL();
943         context_release(context);
944
945         surface->resource.allocatedMemory = NULL;
946     }
947
948     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
949
950     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
951     {
952         TRACE("Not dirtified, nothing to do.\n");
953         goto done;
954     }
955
956     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
957             || (device->fb.render_targets && surface == device->fb.render_targets[0]))
958     {
959         if (!surface->dirtyRect.left && !surface->dirtyRect.top
960                 && surface->dirtyRect.right == surface->resource.width
961                 && surface->dirtyRect.bottom == surface->resource.height)
962         {
963             fullsurface = TRUE;
964         }
965         else
966         {
967             /* TODO: Proper partial rectangle tracking. */
968             fullsurface = FALSE;
969             surface->flags |= SFLAG_INSYSMEM;
970         }
971
972         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
973
974         /* Partial rectangle tracking is not commonly implemented, it is only
975          * done for render targets. INSYSMEM was set before to tell
976          * surface_load_location() where to read the rectangle from.
977          * Indrawable is set because all modifications from the partial
978          * sysmem copy are written back to the drawable, thus the surface is
979          * merged again in the drawable. The sysmem copy is not fully up to
980          * date because only a subrectangle was read in Map(). */
981         if (!fullsurface)
982         {
983             surface_modify_location(surface, surface->draw_binding, TRUE);
984             surface_evict_sysmem(surface);
985         }
986
987         surface->dirtyRect.left = surface->resource.width;
988         surface->dirtyRect.top = surface->resource.height;
989         surface->dirtyRect.right = 0;
990         surface->dirtyRect.bottom = 0;
991     }
992     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
993     {
994         FIXME("Depth / stencil buffer locking is not implemented.\n");
995     }
996
997 done:
998     /* Overlays have to be redrawn manually after changes with the GL implementation */
999     if (surface->overlay_dest)
1000         surface_draw_overlay(surface);
1001 }
1002
1003 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1004 {
1005     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1006         return FALSE;
1007     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1008         return FALSE;
1009     return TRUE;
1010 }
1011
1012 static void wined3d_surface_depth_blt_fbo(const struct wined3d_device *device, struct wined3d_surface *src_surface,
1013         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1014 {
1015     const struct wined3d_gl_info *gl_info;
1016     struct wined3d_context *context;
1017     DWORD src_mask, dst_mask;
1018     GLbitfield gl_mask;
1019
1020     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1021             device, src_surface, wine_dbgstr_rect(src_rect),
1022             dst_surface, wine_dbgstr_rect(dst_rect));
1023
1024     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1025     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1026
1027     if (src_mask != dst_mask)
1028     {
1029         ERR("Incompatible formats %s and %s.\n",
1030                 debug_d3dformat(src_surface->resource.format->id),
1031                 debug_d3dformat(dst_surface->resource.format->id));
1032         return;
1033     }
1034
1035     if (!src_mask)
1036     {
1037         ERR("Not a depth / stencil format: %s.\n",
1038                 debug_d3dformat(src_surface->resource.format->id));
1039         return;
1040     }
1041
1042     gl_mask = 0;
1043     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1044         gl_mask |= GL_DEPTH_BUFFER_BIT;
1045     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1046         gl_mask |= GL_STENCIL_BUFFER_BIT;
1047
1048     /* Make sure the locations are up-to-date. Loading the destination
1049      * surface isn't required if the entire surface is overwritten. */
1050     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1051     if (!surface_is_full_rect(dst_surface, dst_rect))
1052         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1053
1054     context = context_acquire(device, NULL);
1055     if (!context->valid)
1056     {
1057         context_release(context);
1058         WARN("Invalid context, skipping blit.\n");
1059         return;
1060     }
1061
1062     gl_info = context->gl_info;
1063
1064     ENTER_GL();
1065
1066     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1067     glReadBuffer(GL_NONE);
1068     checkGLcall("glReadBuffer()");
1069     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1070
1071     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1072     context_set_draw_buffer(context, GL_NONE);
1073     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1074     context_invalidate_state(context, STATE_FRAMEBUFFER);
1075
1076     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1077     {
1078         glDepthMask(GL_TRUE);
1079         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_ZWRITEENABLE));
1080     }
1081     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1082     {
1083         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1084         {
1085             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1086             context_invalidate_state(context, STATE_RENDER(WINED3D_RS_TWOSIDEDSTENCILMODE));
1087         }
1088         glStencilMask(~0U);
1089         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_STENCILWRITEMASK));
1090     }
1091
1092     glDisable(GL_SCISSOR_TEST);
1093     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1094
1095     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1096             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1097     checkGLcall("glBlitFramebuffer()");
1098
1099     LEAVE_GL();
1100
1101     if (wined3d_settings.strict_draw_ordering)
1102         wglFlush(); /* Flush to ensure ordering across contexts. */
1103
1104     context_release(context);
1105 }
1106
1107 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1108  * Depth / stencil is not supported. */
1109 static void surface_blt_fbo(const struct wined3d_device *device, enum wined3d_texture_filter_type filter,
1110         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1111         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1112 {
1113     const struct wined3d_gl_info *gl_info;
1114     struct wined3d_context *context;
1115     RECT src_rect, dst_rect;
1116     GLenum gl_filter;
1117     GLenum buffer;
1118
1119     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1120     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1121             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1122     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1123             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1124
1125     src_rect = *src_rect_in;
1126     dst_rect = *dst_rect_in;
1127
1128     switch (filter)
1129     {
1130         case WINED3D_TEXF_LINEAR:
1131             gl_filter = GL_LINEAR;
1132             break;
1133
1134         default:
1135             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1136         case WINED3D_TEXF_NONE:
1137         case WINED3D_TEXF_POINT:
1138             gl_filter = GL_NEAREST;
1139             break;
1140     }
1141
1142     /* Resolve the source surface first if needed. */
1143     if (src_location == SFLAG_INRB_MULTISAMPLE
1144             && (src_surface->resource.format->id != dst_surface->resource.format->id
1145                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1146                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1147         src_location = SFLAG_INRB_RESOLVED;
1148
1149     /* Make sure the locations are up-to-date. Loading the destination
1150      * surface isn't required if the entire surface is overwritten. (And is
1151      * in fact harmful if we're being called by surface_load_location() with
1152      * the purpose of loading the destination surface.) */
1153     surface_load_location(src_surface, src_location, NULL);
1154     if (!surface_is_full_rect(dst_surface, &dst_rect))
1155         surface_load_location(dst_surface, dst_location, NULL);
1156
1157     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1158     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1159     else context = context_acquire(device, NULL);
1160
1161     if (!context->valid)
1162     {
1163         context_release(context);
1164         WARN("Invalid context, skipping blit.\n");
1165         return;
1166     }
1167
1168     gl_info = context->gl_info;
1169
1170     if (src_location == SFLAG_INDRAWABLE)
1171     {
1172         TRACE("Source surface %p is onscreen.\n", src_surface);
1173         buffer = surface_get_gl_buffer(src_surface);
1174         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1175     }
1176     else
1177     {
1178         TRACE("Source surface %p is offscreen.\n", src_surface);
1179         buffer = GL_COLOR_ATTACHMENT0;
1180     }
1181
1182     ENTER_GL();
1183     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1184     glReadBuffer(buffer);
1185     checkGLcall("glReadBuffer()");
1186     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1187     LEAVE_GL();
1188
1189     if (dst_location == SFLAG_INDRAWABLE)
1190     {
1191         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1192         buffer = surface_get_gl_buffer(dst_surface);
1193         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1194     }
1195     else
1196     {
1197         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1198         buffer = GL_COLOR_ATTACHMENT0;
1199     }
1200
1201     ENTER_GL();
1202     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1203     context_set_draw_buffer(context, buffer);
1204     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1205     context_invalidate_state(context, STATE_FRAMEBUFFER);
1206
1207     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1208     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE));
1209     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE1));
1210     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE2));
1211     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE3));
1212
1213     glDisable(GL_SCISSOR_TEST);
1214     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1215
1216     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1217             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1218     checkGLcall("glBlitFramebuffer()");
1219
1220     LEAVE_GL();
1221
1222     if (wined3d_settings.strict_draw_ordering
1223             || (dst_location == SFLAG_INDRAWABLE
1224             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1225         wglFlush();
1226
1227     context_release(context);
1228 }
1229
1230 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1231         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
1232         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
1233 {
1234     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1235         return FALSE;
1236
1237     /* Source and/or destination need to be on the GL side */
1238     if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM)
1239         return FALSE;
1240
1241     switch (blit_op)
1242     {
1243         case WINED3D_BLIT_OP_COLOR_BLIT:
1244             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1245                 return FALSE;
1246             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1247                 return FALSE;
1248             break;
1249
1250         case WINED3D_BLIT_OP_DEPTH_BLIT:
1251             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1252                 return FALSE;
1253             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1254                 return FALSE;
1255             break;
1256
1257         default:
1258             return FALSE;
1259     }
1260
1261     if (!(src_format->id == dst_format->id
1262             || (is_identity_fixup(src_format->color_fixup)
1263             && is_identity_fixup(dst_format->color_fixup))))
1264         return FALSE;
1265
1266     return TRUE;
1267 }
1268
1269 /* This function checks if the primary render target uses the 8bit paletted format. */
1270 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1271 {
1272     if (device->fb.render_targets && device->fb.render_targets[0])
1273     {
1274         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1275         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1276                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1277             return TRUE;
1278     }
1279     return FALSE;
1280 }
1281
1282 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1283         DWORD color, struct wined3d_color *float_color)
1284 {
1285     const struct wined3d_format *format = surface->resource.format;
1286     const struct wined3d_device *device = surface->resource.device;
1287
1288     switch (format->id)
1289     {
1290         case WINED3DFMT_P8_UINT:
1291             if (surface->palette)
1292             {
1293                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1294                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1295                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1296             }
1297             else
1298             {
1299                 float_color->r = 0.0f;
1300                 float_color->g = 0.0f;
1301                 float_color->b = 0.0f;
1302             }
1303             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1304             break;
1305
1306         case WINED3DFMT_B5G6R5_UNORM:
1307             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1308             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1309             float_color->b = (color & 0x1f) / 31.0f;
1310             float_color->a = 1.0f;
1311             break;
1312
1313         case WINED3DFMT_B8G8R8_UNORM:
1314         case WINED3DFMT_B8G8R8X8_UNORM:
1315             float_color->r = D3DCOLOR_R(color);
1316             float_color->g = D3DCOLOR_G(color);
1317             float_color->b = D3DCOLOR_B(color);
1318             float_color->a = 1.0f;
1319             break;
1320
1321         case WINED3DFMT_B8G8R8A8_UNORM:
1322             float_color->r = D3DCOLOR_R(color);
1323             float_color->g = D3DCOLOR_G(color);
1324             float_color->b = D3DCOLOR_B(color);
1325             float_color->a = D3DCOLOR_A(color);
1326             break;
1327
1328         default:
1329             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1330             return FALSE;
1331     }
1332
1333     return TRUE;
1334 }
1335
1336 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1337 {
1338     const struct wined3d_format *format = surface->resource.format;
1339
1340     switch (format->id)
1341     {
1342         case WINED3DFMT_S1_UINT_D15_UNORM:
1343             *float_depth = depth / (float)0x00007fff;
1344             break;
1345
1346         case WINED3DFMT_D16_UNORM:
1347             *float_depth = depth / (float)0x0000ffff;
1348             break;
1349
1350         case WINED3DFMT_D24_UNORM_S8_UINT:
1351         case WINED3DFMT_X8D24_UNORM:
1352             *float_depth = depth / (float)0x00ffffff;
1353             break;
1354
1355         case WINED3DFMT_D32_UNORM:
1356             *float_depth = depth / (float)0xffffffff;
1357             break;
1358
1359         default:
1360             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1361             return FALSE;
1362     }
1363
1364     return TRUE;
1365 }
1366
1367 /* Do not call while under the GL lock. */
1368 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1369 {
1370     const struct wined3d_resource *resource = &surface->resource;
1371     struct wined3d_device *device = resource->device;
1372     const struct blit_shader *blitter;
1373
1374     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1375             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1376     if (!blitter)
1377     {
1378         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1379         return WINED3DERR_INVALIDCALL;
1380     }
1381
1382     return blitter->depth_fill(device, surface, rect, depth);
1383 }
1384
1385 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1386         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1387 {
1388     struct wined3d_device *device = src_surface->resource.device;
1389
1390     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1391             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1392             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1393         return WINED3DERR_INVALIDCALL;
1394
1395     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1396
1397     surface_modify_ds_location(dst_surface, SFLAG_INTEXTURE,
1398             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1399
1400     return WINED3D_OK;
1401 }
1402
1403 /* Do not call while under the GL lock. */
1404 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1405         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1406         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter)
1407 {
1408     struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1409     struct wined3d_device *device = dst_surface->resource.device;
1410     DWORD src_ds_flags, dst_ds_flags;
1411     RECT src_rect, dst_rect;
1412     BOOL scale, convert;
1413
1414     static const DWORD simple_blit = WINEDDBLT_ASYNC
1415             | WINEDDBLT_COLORFILL
1416             | WINEDDBLT_WAIT
1417             | WINEDDBLT_DEPTHFILL
1418             | WINEDDBLT_DONOTWAIT;
1419
1420     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1421             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1422             flags, fx, debug_d3dtexturefiltertype(filter));
1423     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1424
1425     if (fx)
1426     {
1427         TRACE("dwSize %#x.\n", fx->dwSize);
1428         TRACE("dwDDFX %#x.\n", fx->dwDDFX);
1429         TRACE("dwROP %#x.\n", fx->dwROP);
1430         TRACE("dwDDROP %#x.\n", fx->dwDDROP);
1431         TRACE("dwRotationAngle %#x.\n", fx->dwRotationAngle);
1432         TRACE("dwZBufferOpCode %#x.\n", fx->dwZBufferOpCode);
1433         TRACE("dwZBufferLow %#x.\n", fx->dwZBufferLow);
1434         TRACE("dwZBufferHigh %#x.\n", fx->dwZBufferHigh);
1435         TRACE("dwZBufferBaseDest %#x.\n", fx->dwZBufferBaseDest);
1436         TRACE("dwZDestConstBitDepth %#x.\n", fx->dwZDestConstBitDepth);
1437         TRACE("lpDDSZBufferDest %p.\n", fx->u1.lpDDSZBufferDest);
1438         TRACE("dwZSrcConstBitDepth %#x.\n", fx->dwZSrcConstBitDepth);
1439         TRACE("lpDDSZBufferSrc %p.\n", fx->u2.lpDDSZBufferSrc);
1440         TRACE("dwAlphaEdgeBlendBitDepth %#x.\n", fx->dwAlphaEdgeBlendBitDepth);
1441         TRACE("dwAlphaEdgeBlend %#x.\n", fx->dwAlphaEdgeBlend);
1442         TRACE("dwReserved %#x.\n", fx->dwReserved);
1443         TRACE("dwAlphaDestConstBitDepth %#x.\n", fx->dwAlphaDestConstBitDepth);
1444         TRACE("lpDDSAlphaDest %p.\n", fx->u3.lpDDSAlphaDest);
1445         TRACE("dwAlphaSrcConstBitDepth %#x.\n", fx->dwAlphaSrcConstBitDepth);
1446         TRACE("lpDDSAlphaSrc %p.\n", fx->u4.lpDDSAlphaSrc);
1447         TRACE("lpDDSPattern %p.\n", fx->u5.lpDDSPattern);
1448         TRACE("ddckDestColorkey {%#x, %#x}.\n",
1449                 fx->ddckDestColorkey.color_space_low_value,
1450                 fx->ddckDestColorkey.color_space_high_value);
1451         TRACE("ddckSrcColorkey {%#x, %#x}.\n",
1452                 fx->ddckSrcColorkey.color_space_low_value,
1453                 fx->ddckSrcColorkey.color_space_high_value);
1454     }
1455
1456     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1457     {
1458         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1459         return WINEDDERR_SURFACEBUSY;
1460     }
1461
1462     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1463
1464     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1465             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1466             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1467             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1468             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1469     {
1470         WARN("The application gave us a bad destination rectangle.\n");
1471         return WINEDDERR_INVALIDRECT;
1472     }
1473
1474     if (src_surface)
1475     {
1476         surface_get_rect(src_surface, src_rect_in, &src_rect);
1477
1478         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1479                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1480                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1481                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1482                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1483         {
1484             WARN("Application gave us bad source rectangle for Blt.\n");
1485             return WINEDDERR_INVALIDRECT;
1486         }
1487     }
1488     else
1489     {
1490         memset(&src_rect, 0, sizeof(src_rect));
1491     }
1492
1493     if (!fx || !(fx->dwDDFX))
1494         flags &= ~WINEDDBLT_DDFX;
1495
1496     if (flags & WINEDDBLT_WAIT)
1497         flags &= ~WINEDDBLT_WAIT;
1498
1499     if (flags & WINEDDBLT_ASYNC)
1500     {
1501         static unsigned int once;
1502
1503         if (!once++)
1504             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1505         flags &= ~WINEDDBLT_ASYNC;
1506     }
1507
1508     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1509     if (flags & WINEDDBLT_DONOTWAIT)
1510     {
1511         static unsigned int once;
1512
1513         if (!once++)
1514             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1515         flags &= ~WINEDDBLT_DONOTWAIT;
1516     }
1517
1518     if (!device->d3d_initialized)
1519     {
1520         WARN("D3D not initialized, using fallback.\n");
1521         goto cpu;
1522     }
1523
1524     /* We want to avoid invalidating the sysmem location for converted
1525      * surfaces, since otherwise we'd have to convert the data back when
1526      * locking them. */
1527     if (dst_surface->flags & SFLAG_CONVERTED)
1528     {
1529         WARN("Converted surface, using CPU blit.\n");
1530         return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1531     }
1532
1533     if (flags & ~simple_blit)
1534     {
1535         WARN("Using fallback for complex blit (%#x).\n", flags);
1536         goto fallback;
1537     }
1538
1539     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1540         src_swapchain = src_surface->container.u.swapchain;
1541     else
1542         src_swapchain = NULL;
1543
1544     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1545         dst_swapchain = dst_surface->container.u.swapchain;
1546     else
1547         dst_swapchain = NULL;
1548
1549     /* This isn't strictly needed. FBO blits for example could deal with
1550      * cross-swapchain blits by first downloading the source to a texture
1551      * before switching to the destination context. We just have this here to
1552      * not have to deal with the issue, since cross-swapchain blits should be
1553      * rare. */
1554     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1555     {
1556         FIXME("Using fallback for cross-swapchain blit.\n");
1557         goto fallback;
1558     }
1559
1560     scale = src_surface
1561             && (src_rect.right - src_rect.left != dst_rect.right - dst_rect.left
1562             || src_rect.bottom - src_rect.top != dst_rect.bottom - dst_rect.top);
1563     convert = src_surface && src_surface->resource.format->id != dst_surface->resource.format->id;
1564
1565     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1566     if (src_surface)
1567         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1568     else
1569         src_ds_flags = 0;
1570
1571     if (src_ds_flags || dst_ds_flags)
1572     {
1573         if (flags & WINEDDBLT_DEPTHFILL)
1574         {
1575             float depth;
1576
1577             TRACE("Depth fill.\n");
1578
1579             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1580                 return WINED3DERR_INVALIDCALL;
1581
1582             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1583                 return WINED3D_OK;
1584         }
1585         else
1586         {
1587             /* Accessing depth / stencil surfaces is supposed to fail while in
1588              * a scene, except for fills, which seem to work. */
1589             if (device->inScene)
1590             {
1591                 WARN("Rejecting depth / stencil access while in scene.\n");
1592                 return WINED3DERR_INVALIDCALL;
1593             }
1594
1595             if (src_ds_flags != dst_ds_flags)
1596             {
1597                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1598                 return WINED3DERR_INVALIDCALL;
1599             }
1600
1601             if (src_rect.top || src_rect.left
1602                     || src_rect.bottom != src_surface->resource.height
1603                     || src_rect.right != src_surface->resource.width)
1604             {
1605                 WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
1606                         wine_dbgstr_rect(&src_rect));
1607                 return WINED3DERR_INVALIDCALL;
1608             }
1609
1610             if (dst_rect.top || dst_rect.left
1611                     || dst_rect.bottom != dst_surface->resource.height
1612                     || dst_rect.right != dst_surface->resource.width)
1613             {
1614                 WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
1615                         wine_dbgstr_rect(&src_rect));
1616                 return WINED3DERR_INVALIDCALL;
1617             }
1618
1619             if (scale)
1620             {
1621                 WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
1622                 return WINED3DERR_INVALIDCALL;
1623             }
1624
1625             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1626                 return WINED3D_OK;
1627         }
1628     }
1629     else
1630     {
1631         /* In principle this would apply to depth blits as well, but we don't
1632          * implement those in the CPU blitter at the moment. */
1633         if ((dst_surface->flags & SFLAG_INSYSMEM)
1634                 && (!src_surface || (src_surface->flags & SFLAG_INSYSMEM)))
1635         {
1636             if (scale)
1637                 TRACE("Not doing sysmem blit because of scaling.\n");
1638             else if (convert)
1639                 TRACE("Not doing sysmem blit because of format conversion.\n");
1640             else
1641                 return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1642         }
1643
1644         if (flags & WINEDDBLT_COLORFILL)
1645         {
1646             struct wined3d_color color;
1647
1648             TRACE("Color fill.\n");
1649
1650             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1651                 goto fallback;
1652
1653             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1654                 return WINED3D_OK;
1655         }
1656         else
1657         {
1658             TRACE("Color blit.\n");
1659
1660             /* Upload */
1661             if ((src_surface->flags & SFLAG_INSYSMEM) && !(dst_surface->flags & SFLAG_INSYSMEM))
1662             {
1663                 if (scale)
1664                     TRACE("Not doing upload because of scaling.\n");
1665                 else if (convert)
1666                     TRACE("Not doing upload because of format conversion.\n");
1667                 else
1668                 {
1669                     POINT dst_point = {dst_rect.left, dst_rect.top};
1670
1671                     if (SUCCEEDED(surface_upload_from_surface(dst_surface, &dst_point, src_surface, &src_rect)))
1672                     {
1673                         if (!surface_is_offscreen(dst_surface))
1674                             surface_load_location(dst_surface, dst_surface->draw_binding, NULL);
1675                         return WINED3D_OK;
1676                     }
1677                 }
1678             }
1679
1680             /* Use present for back -> front blits. The idea behind this is
1681              * that present is potentially faster than a blit, in particular
1682              * when FBO blits aren't available. Some ddraw applications like
1683              * Half-Life and Prince of Persia 3D use Blt() from the backbuffer
1684              * to the frontbuffer instead of doing a Flip(). D3D8 and D3D9
1685              * applications can't blit directly to the frontbuffer. */
1686             if (dst_swapchain && dst_swapchain->back_buffers
1687                     && dst_surface == dst_swapchain->front_buffer
1688                     && src_surface == dst_swapchain->back_buffers[0])
1689             {
1690                 enum wined3d_swap_effect swap_effect = dst_swapchain->desc.swap_effect;
1691
1692                 TRACE("Using present for backbuffer -> frontbuffer blit.\n");
1693
1694                 /* Set the swap effect to COPY, we don't want the backbuffer
1695                  * to become undefined. */
1696                 dst_swapchain->desc.swap_effect = WINED3D_SWAP_EFFECT_COPY;
1697                 wined3d_swapchain_present(dst_swapchain, NULL, NULL, dst_swapchain->win_handle, NULL, 0);
1698                 dst_swapchain->desc.swap_effect = swap_effect;
1699
1700                 return WINED3D_OK;
1701             }
1702
1703             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1704                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1705                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1706             {
1707                 TRACE("Using FBO blit.\n");
1708
1709                 surface_blt_fbo(device, filter,
1710                         src_surface, src_surface->draw_binding, &src_rect,
1711                         dst_surface, dst_surface->draw_binding, &dst_rect);
1712                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1713                 return WINED3D_OK;
1714             }
1715
1716             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1717                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1718                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1719             {
1720                 TRACE("Using arbfp blit.\n");
1721
1722                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1723                     return WINED3D_OK;
1724             }
1725         }
1726     }
1727
1728 fallback:
1729
1730     /* Special cases for render targets. */
1731     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1732             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1733     {
1734         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1735                 src_surface, &src_rect, flags, fx, filter)))
1736             return WINED3D_OK;
1737     }
1738
1739 cpu:
1740
1741     /* For the rest call the X11 surface implementation. For render targets
1742      * this should be implemented OpenGL accelerated in BltOverride, other
1743      * blits are rather rare. */
1744     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1745 }
1746
1747 HRESULT CDECL wined3d_surface_get_render_target_data(struct wined3d_surface *surface,
1748         struct wined3d_surface *render_target)
1749 {
1750     TRACE("surface %p, render_target %p.\n", surface, render_target);
1751
1752     /* TODO: Check surface sizes, pools, etc. */
1753
1754     if (render_target->resource.multisample_type)
1755         return WINED3DERR_INVALIDCALL;
1756
1757     return wined3d_surface_blt(surface, NULL, render_target, NULL, 0, NULL, WINED3D_TEXF_POINT);
1758 }
1759
1760 /* Context activation is done by the caller. */
1761 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1762 {
1763     if (surface->flags & SFLAG_DIBSECTION)
1764     {
1765         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1766     }
1767     else
1768     {
1769         if (!surface->resource.heapMemory)
1770             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1771         else if (!(surface->flags & SFLAG_CLIENT))
1772             ERR("Surface %p has heapMemory %p and flags %#x.\n",
1773                     surface, surface->resource.heapMemory, surface->flags);
1774
1775         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1776                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1777     }
1778
1779     ENTER_GL();
1780     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1781     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1782     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1783             surface->resource.size, surface->resource.allocatedMemory));
1784     checkGLcall("glGetBufferSubDataARB");
1785     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1786     checkGLcall("glDeleteBuffersARB");
1787     LEAVE_GL();
1788
1789     surface->pbo = 0;
1790     surface->flags &= ~SFLAG_PBO;
1791 }
1792
1793 /* Do not call while under the GL lock. */
1794 static void surface_unload(struct wined3d_resource *resource)
1795 {
1796     struct wined3d_surface *surface = surface_from_resource(resource);
1797     struct wined3d_renderbuffer_entry *entry, *entry2;
1798     struct wined3d_device *device = resource->device;
1799     const struct wined3d_gl_info *gl_info;
1800     struct wined3d_context *context;
1801
1802     TRACE("surface %p.\n", surface);
1803
1804     if (resource->pool == WINED3D_POOL_DEFAULT)
1805     {
1806         /* Default pool resources are supposed to be destroyed before Reset is called.
1807          * Implicit resources stay however. So this means we have an implicit render target
1808          * or depth stencil. The content may be destroyed, but we still have to tear down
1809          * opengl resources, so we cannot leave early.
1810          *
1811          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1812          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1813          * or the depth stencil into an FBO the texture or render buffer will be removed
1814          * and all flags get lost
1815          */
1816         if (!(surface->flags & SFLAG_PBO))
1817             surface_init_sysmem(surface);
1818         /* We also get here when the ddraw swapchain is destroyed, for example
1819          * for a mode switch. In this case this surface won't necessarily be
1820          * an implicit surface. We have to mark it lost so that the
1821          * application can restore it after the mode switch. */
1822         surface->flags |= SFLAG_LOST;
1823     }
1824     else
1825     {
1826         /* Load the surface into system memory */
1827         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1828         surface_modify_location(surface, surface->draw_binding, FALSE);
1829     }
1830     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1831     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1832     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1833
1834     context = context_acquire(device, NULL);
1835     gl_info = context->gl_info;
1836
1837     /* Destroy PBOs, but load them into real sysmem before */
1838     if (surface->flags & SFLAG_PBO)
1839         surface_remove_pbo(surface, gl_info);
1840
1841     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1842      * all application-created targets the application has to release the surface
1843      * before calling _Reset
1844      */
1845     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1846     {
1847         ENTER_GL();
1848         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1849         LEAVE_GL();
1850         list_remove(&entry->entry);
1851         HeapFree(GetProcessHeap(), 0, entry);
1852     }
1853     list_init(&surface->renderbuffers);
1854     surface->current_renderbuffer = NULL;
1855
1856     ENTER_GL();
1857
1858     /* If we're in a texture, the texture name belongs to the texture.
1859      * Otherwise, destroy it. */
1860     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1861     {
1862         glDeleteTextures(1, &surface->texture_name);
1863         surface->texture_name = 0;
1864         glDeleteTextures(1, &surface->texture_name_srgb);
1865         surface->texture_name_srgb = 0;
1866     }
1867     if (surface->rb_multisample)
1868     {
1869         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1870         surface->rb_multisample = 0;
1871     }
1872     if (surface->rb_resolved)
1873     {
1874         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1875         surface->rb_resolved = 0;
1876     }
1877
1878     LEAVE_GL();
1879
1880     context_release(context);
1881
1882     resource_unload(resource);
1883 }
1884
1885 static const struct wined3d_resource_ops surface_resource_ops =
1886 {
1887     surface_unload,
1888 };
1889
1890 static const struct wined3d_surface_ops surface_ops =
1891 {
1892     surface_private_setup,
1893     surface_realize_palette,
1894     surface_map,
1895     surface_unmap,
1896 };
1897
1898 /*****************************************************************************
1899  * Initializes the GDI surface, aka creates the DIB section we render to
1900  * The DIB section creation is done by calling GetDC, which will create the
1901  * section and releasing the dc to allow the app to use it. The dib section
1902  * will stay until the surface is released
1903  *
1904  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1905  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1906  * avoid confusion in the shared surface code.
1907  *
1908  * Returns:
1909  *  WINED3D_OK on success
1910  *  The return values of called methods on failure
1911  *
1912  *****************************************************************************/
1913 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1914 {
1915     HRESULT hr;
1916
1917     TRACE("surface %p.\n", surface);
1918
1919     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1920     {
1921         ERR("Overlays not yet supported by GDI surfaces.\n");
1922         return WINED3DERR_INVALIDCALL;
1923     }
1924
1925     /* Sysmem textures have memory already allocated - release it,
1926      * this avoids an unnecessary memcpy. */
1927     hr = surface_create_dib_section(surface);
1928     if (SUCCEEDED(hr))
1929     {
1930         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1931         surface->resource.heapMemory = NULL;
1932         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1933     }
1934
1935     /* We don't mind the nonpow2 stuff in GDI. */
1936     surface->pow2Width = surface->resource.width;
1937     surface->pow2Height = surface->resource.height;
1938
1939     return WINED3D_OK;
1940 }
1941
1942 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1943 {
1944     struct wined3d_palette *palette = surface->palette;
1945
1946     TRACE("surface %p.\n", surface);
1947
1948     if (!palette) return;
1949
1950     if (surface->flags & SFLAG_DIBSECTION)
1951     {
1952         RGBQUAD col[256];
1953         unsigned int i;
1954
1955         TRACE("Updating the DC's palette.\n");
1956
1957         for (i = 0; i < 256; ++i)
1958         {
1959             col[i].rgbRed = palette->palents[i].peRed;
1960             col[i].rgbGreen = palette->palents[i].peGreen;
1961             col[i].rgbBlue = palette->palents[i].peBlue;
1962             col[i].rgbReserved = 0;
1963         }
1964         SetDIBColorTable(surface->hDC, 0, 256, col);
1965     }
1966
1967     /* Update the image because of the palette change. Some games like e.g.
1968      * Red Alert call SetEntries a lot to implement fading. */
1969     /* Tell the swapchain to update the screen. */
1970     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1971     {
1972         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1973         if (surface == swapchain->front_buffer)
1974         {
1975             x11_copy_to_screen(swapchain, NULL);
1976         }
1977     }
1978 }
1979
1980 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
1981 {
1982     TRACE("surface %p, rect %s, flags %#x.\n",
1983             surface, wine_dbgstr_rect(rect), flags);
1984
1985     if (!(surface->flags & SFLAG_DIBSECTION))
1986     {
1987         HRESULT hr;
1988
1989         /* This happens on gdi surfaces if the application set a user pointer
1990          * and resets it. Recreate the DIB section. */
1991         if (FAILED(hr = surface_create_dib_section(surface)))
1992         {
1993             ERR("Failed to create dib section, hr %#x.\n", hr);
1994             return;
1995         }
1996         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1997         surface->resource.heapMemory = NULL;
1998         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1999     }
2000 }
2001
2002 static void gdi_surface_unmap(struct wined3d_surface *surface)
2003 {
2004     TRACE("surface %p.\n", surface);
2005
2006     /* Tell the swapchain to update the screen. */
2007     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2008     {
2009         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2010         if (surface == swapchain->front_buffer)
2011         {
2012             x11_copy_to_screen(swapchain, &surface->lockedRect);
2013         }
2014     }
2015
2016     memset(&surface->lockedRect, 0, sizeof(RECT));
2017 }
2018
2019 static const struct wined3d_surface_ops gdi_surface_ops =
2020 {
2021     gdi_surface_private_setup,
2022     gdi_surface_realize_palette,
2023     gdi_surface_map,
2024     gdi_surface_unmap,
2025 };
2026
2027 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2028 {
2029     GLuint *name;
2030     DWORD flag;
2031
2032     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2033
2034     if(srgb)
2035     {
2036         name = &surface->texture_name_srgb;
2037         flag = SFLAG_INSRGBTEX;
2038     }
2039     else
2040     {
2041         name = &surface->texture_name;
2042         flag = SFLAG_INTEXTURE;
2043     }
2044
2045     if (!*name && new_name)
2046     {
2047         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2048          * surface has no texture name yet. See if we can get rid of this. */
2049         if (surface->flags & flag)
2050         {
2051             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2052             surface_modify_location(surface, flag, FALSE);
2053         }
2054     }
2055
2056     *name = new_name;
2057     surface_force_reload(surface);
2058 }
2059
2060 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
2061 {
2062     TRACE("surface %p, target %#x.\n", surface, target);
2063
2064     if (surface->texture_target != target)
2065     {
2066         if (target == GL_TEXTURE_RECTANGLE_ARB)
2067         {
2068             surface->flags &= ~SFLAG_NORMCOORD;
2069         }
2070         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2071         {
2072             surface->flags |= SFLAG_NORMCOORD;
2073         }
2074     }
2075     surface->texture_target = target;
2076     surface_force_reload(surface);
2077 }
2078
2079 /* Context activation is done by the caller. */
2080 void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
2081 {
2082     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
2083
2084     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2085     {
2086         struct wined3d_texture *texture = surface->container.u.texture;
2087
2088         TRACE("Passing to container (%p).\n", texture);
2089         texture->texture_ops->texture_bind(texture, context, srgb);
2090     }
2091     else
2092     {
2093         if (surface->texture_level)
2094         {
2095             ERR("Standalone surface %p is non-zero texture level %u.\n",
2096                     surface, surface->texture_level);
2097         }
2098
2099         if (srgb)
2100             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
2101
2102         ENTER_GL();
2103
2104         if (!surface->texture_name)
2105         {
2106             glGenTextures(1, &surface->texture_name);
2107             checkGLcall("glGenTextures");
2108
2109             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
2110
2111             context_bind_texture(context, surface->texture_target, surface->texture_name);
2112             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2113             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2114             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
2115             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2116             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2117             checkGLcall("glTexParameteri");
2118         }
2119         else
2120         {
2121             context_bind_texture(context, surface->texture_target, surface->texture_name);
2122         }
2123
2124         LEAVE_GL();
2125     }
2126 }
2127
2128 /* This call just downloads data, the caller is responsible for binding the
2129  * correct texture. */
2130 /* Context activation is done by the caller. */
2131 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2132 {
2133     const struct wined3d_format *format = surface->resource.format;
2134
2135     /* Only support read back of converted P8 surfaces. */
2136     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2137     {
2138         ERR("Trying to read back converted surface %p with format %s.\n", surface, debug_d3dformat(format->id));
2139         return;
2140     }
2141
2142     ENTER_GL();
2143
2144     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2145     {
2146         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2147                 surface, surface->texture_level, format->glFormat, format->glType,
2148                 surface->resource.allocatedMemory);
2149
2150         if (surface->flags & SFLAG_PBO)
2151         {
2152             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2153             checkGLcall("glBindBufferARB");
2154             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2155             checkGLcall("glGetCompressedTexImageARB");
2156             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2157             checkGLcall("glBindBufferARB");
2158         }
2159         else
2160         {
2161             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2162                     surface->texture_level, surface->resource.allocatedMemory));
2163             checkGLcall("glGetCompressedTexImageARB");
2164         }
2165
2166         LEAVE_GL();
2167     }
2168     else
2169     {
2170         void *mem;
2171         GLenum gl_format = format->glFormat;
2172         GLenum gl_type = format->glType;
2173         int src_pitch = 0;
2174         int dst_pitch = 0;
2175
2176         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2177         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2178         {
2179             gl_format = GL_ALPHA;
2180             gl_type = GL_UNSIGNED_BYTE;
2181         }
2182
2183         if (surface->flags & SFLAG_NONPOW2)
2184         {
2185             unsigned char alignment = surface->resource.device->surface_alignment;
2186             src_pitch = format->byte_count * surface->pow2Width;
2187             dst_pitch = wined3d_surface_get_pitch(surface);
2188             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2189             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2190         }
2191         else
2192         {
2193             mem = surface->resource.allocatedMemory;
2194         }
2195
2196         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2197                 surface, surface->texture_level, gl_format, gl_type, mem);
2198
2199         if (surface->flags & SFLAG_PBO)
2200         {
2201             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2202             checkGLcall("glBindBufferARB");
2203
2204             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2205             checkGLcall("glGetTexImage");
2206
2207             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2208             checkGLcall("glBindBufferARB");
2209         }
2210         else
2211         {
2212             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2213             checkGLcall("glGetTexImage");
2214         }
2215         LEAVE_GL();
2216
2217         if (surface->flags & SFLAG_NONPOW2)
2218         {
2219             const BYTE *src_data;
2220             BYTE *dst_data;
2221             UINT y;
2222             /*
2223              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2224              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2225              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2226              *
2227              * We're doing this...
2228              *
2229              * instead of boxing the texture :
2230              * |<-texture width ->|  -->pow2width|   /\
2231              * |111111111111111111|              |   |
2232              * |222 Texture 222222| boxed empty  | texture height
2233              * |3333 Data 33333333|              |   |
2234              * |444444444444444444|              |   \/
2235              * -----------------------------------   |
2236              * |     boxed  empty | boxed empty  | pow2height
2237              * |                  |              |   \/
2238              * -----------------------------------
2239              *
2240              *
2241              * we're repacking the data to the expected texture width
2242              *
2243              * |<-texture width ->|  -->pow2width|   /\
2244              * |111111111111111111222222222222222|   |
2245              * |222333333333333333333444444444444| texture height
2246              * |444444                           |   |
2247              * |                                 |   \/
2248              * |                                 |   |
2249              * |            empty                | pow2height
2250              * |                                 |   \/
2251              * -----------------------------------
2252              *
2253              * == is the same as
2254              *
2255              * |<-texture width ->|    /\
2256              * |111111111111111111|
2257              * |222222222222222222|texture height
2258              * |333333333333333333|
2259              * |444444444444444444|    \/
2260              * --------------------
2261              *
2262              * this also means that any references to allocatedMemory should work with the data as if were a
2263              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2264              *
2265              * internally the texture is still stored in a boxed format so any references to textureName will
2266              * get a boxed texture with width pow2width and not a texture of width resource.width.
2267              *
2268              * Performance should not be an issue, because applications normally do not lock the surfaces when
2269              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2270              * and doesn't have to be re-read. */
2271             src_data = mem;
2272             dst_data = surface->resource.allocatedMemory;
2273             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2274             for (y = 1; y < surface->resource.height; ++y)
2275             {
2276                 /* skip the first row */
2277                 src_data += src_pitch;
2278                 dst_data += dst_pitch;
2279                 memcpy(dst_data, src_data, dst_pitch);
2280             }
2281
2282             HeapFree(GetProcessHeap(), 0, mem);
2283         }
2284     }
2285
2286     /* Surface has now been downloaded */
2287     surface->flags |= SFLAG_INSYSMEM;
2288 }
2289
2290 /* This call just uploads data, the caller is responsible for binding the
2291  * correct texture. */
2292 /* Context activation is done by the caller. */
2293 static void surface_upload_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2294         const struct wined3d_format *format, const RECT *src_rect, UINT src_pitch, const POINT *dst_point,
2295         BOOL srgb, const struct wined3d_bo_address *data)
2296 {
2297     UINT update_w = src_rect->right - src_rect->left;
2298     UINT update_h = src_rect->bottom - src_rect->top;
2299
2300     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_pitch %u, dst_point %s, srgb %#x, data {%#x:%p}.\n",
2301             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_pitch,
2302             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2303
2304     if (surface->flags & SFLAG_LOCKED)
2305     {
2306         WARN("Uploading a surface that is currently mapped, setting SFLAG_PIN_SYSMEM.\n");
2307         surface->flags |= SFLAG_PIN_SYSMEM;
2308     }
2309
2310     if (format->heightscale != 1.0f && format->heightscale != 0.0f)
2311         update_h *= format->heightscale;
2312
2313     ENTER_GL();
2314
2315     if (data->buffer_object)
2316     {
2317         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2318         checkGLcall("glBindBufferARB");
2319     }
2320
2321     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2322     {
2323         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2324         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2325         const BYTE *addr = data->addr;
2326         GLenum internal;
2327
2328         addr += (src_rect->top / format->block_height) * src_pitch;
2329         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2330
2331         if (srgb)
2332             internal = format->glGammaInternal;
2333         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2334             internal = format->rtInternal;
2335         else
2336             internal = format->glInternal;
2337
2338         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2339                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2340                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2341
2342         if (row_length == src_pitch)
2343         {
2344             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2345                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2346         }
2347         else
2348         {
2349             UINT row, y;
2350
2351             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2352              * can't use the unpack row length like below. */
2353             for (row = 0, y = dst_point->y; row < row_count; ++row)
2354             {
2355                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2356                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2357                 y += format->block_height;
2358                 addr += src_pitch;
2359             }
2360         }
2361         checkGLcall("glCompressedTexSubImage2DARB");
2362     }
2363     else
2364     {
2365         const BYTE *addr = data->addr;
2366
2367         addr += src_rect->top * src_pitch;
2368         addr += src_rect->left * format->byte_count;
2369
2370         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2371                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2372                 update_w, update_h, format->glFormat, format->glType, addr);
2373
2374         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_pitch / format->byte_count);
2375         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2376                 update_w, update_h, format->glFormat, format->glType, addr);
2377         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2378         checkGLcall("glTexSubImage2D");
2379     }
2380
2381     if (data->buffer_object)
2382     {
2383         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2384         checkGLcall("glBindBufferARB");
2385     }
2386
2387     LEAVE_GL();
2388
2389     if (wined3d_settings.strict_draw_ordering)
2390         wglFlush();
2391
2392     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2393     {
2394         struct wined3d_device *device = surface->resource.device;
2395         unsigned int i;
2396
2397         for (i = 0; i < device->context_count; ++i)
2398         {
2399             context_surface_update(device->contexts[i], surface);
2400         }
2401     }
2402 }
2403
2404 HRESULT surface_upload_from_surface(struct wined3d_surface *dst_surface, const POINT *dst_point,
2405         struct wined3d_surface *src_surface, const RECT *src_rect)
2406 {
2407     const struct wined3d_format *src_format;
2408     const struct wined3d_format *dst_format;
2409     const struct wined3d_gl_info *gl_info;
2410     struct wined3d_context *context;
2411     struct wined3d_bo_address data;
2412     struct wined3d_format format;
2413     UINT update_w, update_h;
2414     CONVERT_TYPES convert;
2415     UINT dst_w, dst_h;
2416     UINT src_w, src_h;
2417     UINT src_pitch;
2418     POINT p;
2419     RECT r;
2420
2421     TRACE("dst_surface %p, dst_point %s, src_surface %p, src_rect %s.\n",
2422             dst_surface, wine_dbgstr_point(dst_point),
2423             src_surface, wine_dbgstr_rect(src_rect));
2424
2425     src_format = src_surface->resource.format;
2426     dst_format = dst_surface->resource.format;
2427
2428     if (src_format->id != dst_format->id)
2429     {
2430         WARN("Source and destination surfaces should have the same format.\n");
2431         return WINED3DERR_INVALIDCALL;
2432     }
2433
2434     if (!dst_point)
2435     {
2436         p.x = 0;
2437         p.y = 0;
2438         dst_point = &p;
2439     }
2440     else if (dst_point->x < 0 || dst_point->y < 0)
2441     {
2442         WARN("Invalid destination point.\n");
2443         return WINED3DERR_INVALIDCALL;
2444     }
2445
2446     if (!src_rect)
2447     {
2448         r.left = 0;
2449         r.top = 0;
2450         r.right = src_surface->resource.width;
2451         r.bottom = src_surface->resource.height;
2452         src_rect = &r;
2453     }
2454     else if (src_rect->left < 0 || src_rect->left >= src_rect->right
2455             || src_rect->top < 0 || src_rect->top >= src_rect->bottom)
2456     {
2457         WARN("Invalid source rectangle.\n");
2458         return WINED3DERR_INVALIDCALL;
2459     }
2460
2461     src_w = src_surface->resource.width;
2462     src_h = src_surface->resource.height;
2463
2464     dst_w = dst_surface->resource.width;
2465     dst_h = dst_surface->resource.height;
2466
2467     update_w = src_rect->right - src_rect->left;
2468     update_h = src_rect->bottom - src_rect->top;
2469
2470     if (update_w > dst_w || dst_point->x > dst_w - update_w
2471             || update_h > dst_h || dst_point->y > dst_h - update_h)
2472     {
2473         WARN("Destination out of bounds.\n");
2474         return WINED3DERR_INVALIDCALL;
2475     }
2476
2477     /* NPOT block sizes would be silly. */
2478     if ((src_format->flags & WINED3DFMT_FLAG_BLOCKS)
2479             && ((update_w & (src_format->block_width - 1) || update_h & (src_format->block_height - 1))
2480             && (src_w != update_w || dst_w != update_w || src_h != update_h || dst_h != update_h)))
2481     {
2482         WARN("Update rect not block-aligned.\n");
2483         return WINED3DERR_INVALIDCALL;
2484     }
2485
2486     /* Use wined3d_surface_blt() instead of uploading directly if we need conversion. */
2487     d3dfmt_get_conv(dst_surface, FALSE, TRUE, &format, &convert);
2488     if (convert != NO_CONVERSION || format.convert)
2489     {
2490         RECT dst_rect = {dst_point->x,  dst_point->y, dst_point->x + update_w, dst_point->y + update_h};
2491         return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, src_rect, 0, NULL, WINED3D_TEXF_POINT);
2492     }
2493
2494     context = context_acquire(dst_surface->resource.device, NULL);
2495     gl_info = context->gl_info;
2496
2497     /* Only load the surface for partial updates. For newly allocated texture
2498      * the texture wouldn't be the current location, and we'd upload zeroes
2499      * just to overwrite them again. */
2500     if (update_w == dst_w && update_h == dst_h)
2501         surface_prepare_texture(dst_surface, context, FALSE);
2502     else
2503         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
2504     surface_bind(dst_surface, context, FALSE);
2505
2506     data.buffer_object = src_surface->pbo;
2507     data.addr = src_surface->resource.allocatedMemory;
2508     src_pitch = wined3d_surface_get_pitch(src_surface);
2509
2510     surface_upload_data(dst_surface, gl_info, src_format, src_rect, src_pitch, dst_point, FALSE, &data);
2511
2512     invalidate_active_texture(dst_surface->resource.device, context);
2513
2514     context_release(context);
2515
2516     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
2517     return WINED3D_OK;
2518 }
2519
2520 /* This call just allocates the texture, the caller is responsible for binding
2521  * the correct texture. */
2522 /* Context activation is done by the caller. */
2523 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2524         const struct wined3d_format *format, BOOL srgb)
2525 {
2526     BOOL enable_client_storage = FALSE;
2527     GLsizei width = surface->pow2Width;
2528     GLsizei height = surface->pow2Height;
2529     const BYTE *mem = NULL;
2530     GLenum internal;
2531
2532     if (srgb)
2533     {
2534         internal = format->glGammaInternal;
2535     }
2536     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2537     {
2538         internal = format->rtInternal;
2539     }
2540     else
2541     {
2542         internal = format->glInternal;
2543     }
2544
2545     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2546
2547     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2548             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2549             internal, width, height, format->glFormat, format->glType);
2550
2551     ENTER_GL();
2552
2553     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2554     {
2555         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2556                 || !surface->resource.allocatedMemory)
2557         {
2558             /* In some cases we want to disable client storage.
2559              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2560              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2561              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2562              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2563              */
2564             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2565             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2566             surface->flags &= ~SFLAG_CLIENT;
2567             enable_client_storage = TRUE;
2568         }
2569         else
2570         {
2571             surface->flags |= SFLAG_CLIENT;
2572
2573             /* Point OpenGL to our allocated texture memory. Do not use
2574              * resource.allocatedMemory here because it might point into a
2575              * PBO. Instead use heapMemory, but get the alignment right. */
2576             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2577                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2578         }
2579     }
2580
2581     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2582     {
2583         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2584                 internal, width, height, 0, surface->resource.size, mem));
2585         checkGLcall("glCompressedTexImage2DARB");
2586     }
2587     else
2588     {
2589         glTexImage2D(surface->texture_target, surface->texture_level,
2590                 internal, width, height, 0, format->glFormat, format->glType, mem);
2591         checkGLcall("glTexImage2D");
2592     }
2593
2594     if(enable_client_storage) {
2595         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2596         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2597     }
2598     LEAVE_GL();
2599 }
2600
2601 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2602  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2603 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2604 /* GL locking is done by the caller */
2605 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2606 {
2607     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2608     struct wined3d_renderbuffer_entry *entry;
2609     GLuint renderbuffer = 0;
2610     unsigned int src_width, src_height;
2611     unsigned int width, height;
2612
2613     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2614     {
2615         width = rt->pow2Width;
2616         height = rt->pow2Height;
2617     }
2618     else
2619     {
2620         width = surface->pow2Width;
2621         height = surface->pow2Height;
2622     }
2623
2624     src_width = surface->pow2Width;
2625     src_height = surface->pow2Height;
2626
2627     /* A depth stencil smaller than the render target is not valid */
2628     if (width > src_width || height > src_height) return;
2629
2630     /* Remove any renderbuffer set if the sizes match */
2631     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2632             || (width == src_width && height == src_height))
2633     {
2634         surface->current_renderbuffer = NULL;
2635         return;
2636     }
2637
2638     /* Look if we've already got a renderbuffer of the correct dimensions */
2639     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2640     {
2641         if (entry->width == width && entry->height == height)
2642         {
2643             renderbuffer = entry->id;
2644             surface->current_renderbuffer = entry;
2645             break;
2646         }
2647     }
2648
2649     if (!renderbuffer)
2650     {
2651         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2652         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2653         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2654                 surface->resource.format->glInternal, width, height);
2655
2656         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2657         entry->width = width;
2658         entry->height = height;
2659         entry->id = renderbuffer;
2660         list_add_head(&surface->renderbuffers, &entry->entry);
2661
2662         surface->current_renderbuffer = entry;
2663     }
2664
2665     checkGLcall("set_compatible_renderbuffer");
2666 }
2667
2668 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2669 {
2670     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2671
2672     TRACE("surface %p.\n", surface);
2673
2674     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2675     {
2676         ERR("Surface %p is not on a swapchain.\n", surface);
2677         return GL_NONE;
2678     }
2679
2680     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2681     {
2682         if (swapchain->render_to_fbo)
2683         {
2684             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2685             return GL_COLOR_ATTACHMENT0;
2686         }
2687         TRACE("Returning GL_BACK\n");
2688         return GL_BACK;
2689     }
2690     else if (surface == swapchain->front_buffer)
2691     {
2692         TRACE("Returning GL_FRONT\n");
2693         return GL_FRONT;
2694     }
2695
2696     FIXME("Higher back buffer, returning GL_BACK\n");
2697     return GL_BACK;
2698 }
2699
2700 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2701 void surface_add_dirty_rect(struct wined3d_surface *surface, const struct wined3d_box *dirty_rect)
2702 {
2703     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2704
2705     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2706         /* No partial locking for textures yet. */
2707         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2708
2709     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2710     if (dirty_rect)
2711     {
2712         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->left);
2713         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->top);
2714         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->right);
2715         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->bottom);
2716     }
2717     else
2718     {
2719         surface->dirtyRect.left = 0;
2720         surface->dirtyRect.top = 0;
2721         surface->dirtyRect.right = surface->resource.width;
2722         surface->dirtyRect.bottom = surface->resource.height;
2723     }
2724
2725     /* if the container is a texture then mark it dirty. */
2726     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2727     {
2728         TRACE("Passing to container.\n");
2729         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2730     }
2731 }
2732
2733 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2734 {
2735     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2736     BOOL ck_changed;
2737
2738     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2739
2740     if (surface->resource.pool == WINED3D_POOL_SCRATCH)
2741     {
2742         ERR("Not supported on scratch surfaces.\n");
2743         return WINED3DERR_INVALIDCALL;
2744     }
2745
2746     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2747
2748     /* Reload if either the texture and sysmem have different ideas about the
2749      * color key, or the actual key values changed. */
2750     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2751             && (surface->gl_color_key.color_space_low_value != surface->src_blt_color_key.color_space_low_value
2752             || surface->gl_color_key.color_space_high_value != surface->src_blt_color_key.color_space_high_value)))
2753     {
2754         TRACE("Reloading because of color keying\n");
2755         /* To perform the color key conversion we need a sysmem copy of
2756          * the surface. Make sure we have it. */
2757
2758         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2759         /* Make sure the texture is reloaded because of the color key change,
2760          * this kills performance though :( */
2761         /* TODO: This is not necessarily needed with hw palettized texture support. */
2762         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2763         /* Switching color keying on / off may change the internal format. */
2764         if (ck_changed)
2765             surface_force_reload(surface);
2766     }
2767     else if (!(surface->flags & flag))
2768     {
2769         TRACE("Reloading because surface is dirty.\n");
2770     }
2771     else
2772     {
2773         TRACE("surface is already in texture\n");
2774         return WINED3D_OK;
2775     }
2776
2777     /* No partial locking for textures yet. */
2778     surface_load_location(surface, flag, NULL);
2779     surface_evict_sysmem(surface);
2780
2781     return WINED3D_OK;
2782 }
2783
2784 /* See also float_16_to_32() in wined3d_private.h */
2785 static inline unsigned short float_32_to_16(const float *in)
2786 {
2787     int exp = 0;
2788     float tmp = fabsf(*in);
2789     unsigned int mantissa;
2790     unsigned short ret;
2791
2792     /* Deal with special numbers */
2793     if (*in == 0.0f)
2794         return 0x0000;
2795     if (isnan(*in))
2796         return 0x7c01;
2797     if (isinf(*in))
2798         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2799
2800     if (tmp < powf(2, 10))
2801     {
2802         do
2803         {
2804             tmp = tmp * 2.0f;
2805             exp--;
2806         } while (tmp < powf(2, 10));
2807     }
2808     else if (tmp >= powf(2, 11))
2809     {
2810         do
2811         {
2812             tmp /= 2.0f;
2813             exp++;
2814         } while (tmp >= powf(2, 11));
2815     }
2816
2817     mantissa = (unsigned int)tmp;
2818     if (tmp - mantissa >= 0.5f)
2819         ++mantissa; /* Round to nearest, away from zero. */
2820
2821     exp += 10;  /* Normalize the mantissa. */
2822     exp += 15;  /* Exponent is encoded with excess 15. */
2823
2824     if (exp > 30) /* too big */
2825     {
2826         ret = 0x7c00; /* INF */
2827     }
2828     else if (exp <= 0)
2829     {
2830         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2831         while (exp <= 0)
2832         {
2833             mantissa = mantissa >> 1;
2834             ++exp;
2835         }
2836         ret = mantissa & 0x3ff;
2837     }
2838     else
2839     {
2840         ret = (exp << 10) | (mantissa & 0x3ff);
2841     }
2842
2843     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2844     return ret;
2845 }
2846
2847 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2848 {
2849     ULONG refcount;
2850
2851     TRACE("Surface %p, container %p of type %#x.\n",
2852             surface, surface->container.u.base, surface->container.type);
2853
2854     switch (surface->container.type)
2855     {
2856         case WINED3D_CONTAINER_TEXTURE:
2857             return wined3d_texture_incref(surface->container.u.texture);
2858
2859         case WINED3D_CONTAINER_SWAPCHAIN:
2860             return wined3d_swapchain_incref(surface->container.u.swapchain);
2861
2862         default:
2863             ERR("Unhandled container type %#x.\n", surface->container.type);
2864         case WINED3D_CONTAINER_NONE:
2865             break;
2866     }
2867
2868     refcount = InterlockedIncrement(&surface->resource.ref);
2869     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2870
2871     return refcount;
2872 }
2873
2874 /* Do not call while under the GL lock. */
2875 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2876 {
2877     ULONG refcount;
2878
2879     TRACE("Surface %p, container %p of type %#x.\n",
2880             surface, surface->container.u.base, surface->container.type);
2881
2882     switch (surface->container.type)
2883     {
2884         case WINED3D_CONTAINER_TEXTURE:
2885             return wined3d_texture_decref(surface->container.u.texture);
2886
2887         case WINED3D_CONTAINER_SWAPCHAIN:
2888             return wined3d_swapchain_decref(surface->container.u.swapchain);
2889
2890         default:
2891             ERR("Unhandled container type %#x.\n", surface->container.type);
2892         case WINED3D_CONTAINER_NONE:
2893             break;
2894     }
2895
2896     refcount = InterlockedDecrement(&surface->resource.ref);
2897     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2898
2899     if (!refcount)
2900     {
2901         surface_cleanup(surface);
2902         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2903
2904         TRACE("Destroyed surface %p.\n", surface);
2905         HeapFree(GetProcessHeap(), 0, surface);
2906     }
2907
2908     return refcount;
2909 }
2910
2911 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2912 {
2913     return resource_set_priority(&surface->resource, priority);
2914 }
2915
2916 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2917 {
2918     return resource_get_priority(&surface->resource);
2919 }
2920
2921 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2922 {
2923     TRACE("surface %p.\n", surface);
2924
2925     if (!surface->resource.device->d3d_initialized)
2926     {
2927         ERR("D3D not initialized.\n");
2928         return;
2929     }
2930
2931     surface_internal_preload(surface, SRGB_ANY);
2932 }
2933
2934 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2935 {
2936     TRACE("surface %p.\n", surface);
2937
2938     return surface->resource.parent;
2939 }
2940
2941 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2942 {
2943     TRACE("surface %p.\n", surface);
2944
2945     return &surface->resource;
2946 }
2947
2948 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2949 {
2950     TRACE("surface %p, flags %#x.\n", surface, flags);
2951
2952     switch (flags)
2953     {
2954         case WINEDDGBS_CANBLT:
2955         case WINEDDGBS_ISBLTDONE:
2956             return WINED3D_OK;
2957
2958         default:
2959             return WINED3DERR_INVALIDCALL;
2960     }
2961 }
2962
2963 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2964 {
2965     TRACE("surface %p, flags %#x.\n", surface, flags);
2966
2967     /* XXX: DDERR_INVALIDSURFACETYPE */
2968
2969     switch (flags)
2970     {
2971         case WINEDDGFS_CANFLIP:
2972         case WINEDDGFS_ISFLIPDONE:
2973             return WINED3D_OK;
2974
2975         default:
2976             return WINED3DERR_INVALIDCALL;
2977     }
2978 }
2979
2980 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2981 {
2982     TRACE("surface %p.\n", surface);
2983
2984     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2985     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2986 }
2987
2988 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2989 {
2990     TRACE("surface %p.\n", surface);
2991
2992     surface->flags &= ~SFLAG_LOST;
2993     return WINED3D_OK;
2994 }
2995
2996 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2997 {
2998     TRACE("surface %p, palette %p.\n", surface, palette);
2999
3000     if (surface->palette == palette)
3001     {
3002         TRACE("Nop palette change.\n");
3003         return WINED3D_OK;
3004     }
3005
3006     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
3007         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
3008
3009     surface->palette = palette;
3010
3011     if (palette)
3012     {
3013         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3014             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
3015
3016         surface->surface_ops->surface_realize_palette(surface);
3017     }
3018
3019     return WINED3D_OK;
3020 }
3021
3022 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
3023         DWORD flags, const struct wined3d_color_key *color_key)
3024 {
3025     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
3026
3027     if (flags & WINEDDCKEY_COLORSPACE)
3028     {
3029         FIXME(" colorkey value not supported (%08x) !\n", flags);
3030         return WINED3DERR_INVALIDCALL;
3031     }
3032
3033     /* Dirtify the surface, but only if a key was changed. */
3034     if (color_key)
3035     {
3036         switch (flags & ~WINEDDCKEY_COLORSPACE)
3037         {
3038             case WINEDDCKEY_DESTBLT:
3039                 surface->dst_blt_color_key = *color_key;
3040                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3041                 break;
3042
3043             case WINEDDCKEY_DESTOVERLAY:
3044                 surface->dst_overlay_color_key = *color_key;
3045                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3046                 break;
3047
3048             case WINEDDCKEY_SRCOVERLAY:
3049                 surface->src_overlay_color_key = *color_key;
3050                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3051                 break;
3052
3053             case WINEDDCKEY_SRCBLT:
3054                 surface->src_blt_color_key = *color_key;
3055                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3056                 break;
3057         }
3058     }
3059     else
3060     {
3061         switch (flags & ~WINEDDCKEY_COLORSPACE)
3062         {
3063             case WINEDDCKEY_DESTBLT:
3064                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3065                 break;
3066
3067             case WINEDDCKEY_DESTOVERLAY:
3068                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3069                 break;
3070
3071             case WINEDDCKEY_SRCOVERLAY:
3072                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3073                 break;
3074
3075             case WINEDDCKEY_SRCBLT:
3076                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3077                 break;
3078         }
3079     }
3080
3081     return WINED3D_OK;
3082 }
3083
3084 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3085 {
3086     TRACE("surface %p.\n", surface);
3087
3088     return surface->palette;
3089 }
3090
3091 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3092 {
3093     const struct wined3d_format *format = surface->resource.format;
3094     DWORD pitch;
3095
3096     TRACE("surface %p.\n", surface);
3097
3098     if (format->flags & WINED3DFMT_FLAG_BLOCKS)
3099     {
3100         /* Since compressed formats are block based, pitch means the amount of
3101          * bytes to the next row of block rather than the next row of pixels. */
3102         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3103         pitch = row_block_count * format->block_byte_count;
3104     }
3105     else
3106     {
3107         unsigned char alignment = surface->resource.device->surface_alignment;
3108         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3109         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3110     }
3111
3112     TRACE("Returning %u.\n", pitch);
3113
3114     return pitch;
3115 }
3116
3117 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3118 {
3119     TRACE("surface %p, mem %p.\n", surface, mem);
3120
3121     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
3122     {
3123         WARN("Surface is locked or the DC is in use.\n");
3124         return WINED3DERR_INVALIDCALL;
3125     }
3126
3127     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3128     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3129     {
3130         ERR("Not supported on render targets.\n");
3131         return WINED3DERR_INVALIDCALL;
3132     }
3133
3134     if (mem && mem != surface->resource.allocatedMemory)
3135     {
3136         void *release = NULL;
3137
3138         /* Do I have to copy the old surface content? */
3139         if (surface->flags & SFLAG_DIBSECTION)
3140         {
3141             DeleteDC(surface->hDC);
3142             DeleteObject(surface->dib.DIBsection);
3143             surface->dib.bitmap_data = NULL;
3144             surface->resource.allocatedMemory = NULL;
3145             surface->hDC = NULL;
3146             surface->flags &= ~SFLAG_DIBSECTION;
3147         }
3148         else if (!(surface->flags & SFLAG_USERPTR))
3149         {
3150             release = surface->resource.heapMemory;
3151             surface->resource.heapMemory = NULL;
3152         }
3153         surface->resource.allocatedMemory = mem;
3154         surface->flags |= SFLAG_USERPTR;
3155
3156         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3157         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3158
3159         /* For client textures OpenGL has to be notified. */
3160         if (surface->flags & SFLAG_CLIENT)
3161             surface_release_client_storage(surface);
3162
3163         /* Now free the old memory if any. */
3164         HeapFree(GetProcessHeap(), 0, release);
3165     }
3166     else if (surface->flags & SFLAG_USERPTR)
3167     {
3168         /* HeapMemory should be NULL already. */
3169         if (surface->resource.heapMemory)
3170             ERR("User pointer surface has heap memory allocated.\n");
3171
3172         if (!mem)
3173         {
3174             surface->resource.allocatedMemory = NULL;
3175             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3176
3177             if (surface->flags & SFLAG_CLIENT)
3178                 surface_release_client_storage(surface);
3179
3180             surface_prepare_system_memory(surface);
3181         }
3182
3183         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3184     }
3185
3186     return WINED3D_OK;
3187 }
3188
3189 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3190 {
3191     LONG w, h;
3192
3193     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3194
3195     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3196     {
3197         WARN("Not an overlay surface.\n");
3198         return WINEDDERR_NOTAOVERLAYSURFACE;
3199     }
3200
3201     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3202     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3203     surface->overlay_destrect.left = x;
3204     surface->overlay_destrect.top = y;
3205     surface->overlay_destrect.right = x + w;
3206     surface->overlay_destrect.bottom = y + h;
3207
3208     surface_draw_overlay(surface);
3209
3210     return WINED3D_OK;
3211 }
3212
3213 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3214 {
3215     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3216
3217     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3218     {
3219         TRACE("Not an overlay surface.\n");
3220         return WINEDDERR_NOTAOVERLAYSURFACE;
3221     }
3222
3223     if (!surface->overlay_dest)
3224     {
3225         TRACE("Overlay not visible.\n");
3226         *x = 0;
3227         *y = 0;
3228         return WINEDDERR_OVERLAYNOTVISIBLE;
3229     }
3230
3231     *x = surface->overlay_destrect.left;
3232     *y = surface->overlay_destrect.top;
3233
3234     TRACE("Returning position %d, %d.\n", *x, *y);
3235
3236     return WINED3D_OK;
3237 }
3238
3239 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3240         DWORD flags, struct wined3d_surface *ref)
3241 {
3242     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3243
3244     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3245     {
3246         TRACE("Not an overlay surface.\n");
3247         return WINEDDERR_NOTAOVERLAYSURFACE;
3248     }
3249
3250     return WINED3D_OK;
3251 }
3252
3253 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3254         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3255 {
3256     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3257             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3258
3259     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3260     {
3261         WARN("Not an overlay surface.\n");
3262         return WINEDDERR_NOTAOVERLAYSURFACE;
3263     }
3264     else if (!dst_surface)
3265     {
3266         WARN("Dest surface is NULL.\n");
3267         return WINED3DERR_INVALIDCALL;
3268     }
3269
3270     if (src_rect)
3271     {
3272         surface->overlay_srcrect = *src_rect;
3273     }
3274     else
3275     {
3276         surface->overlay_srcrect.left = 0;
3277         surface->overlay_srcrect.top = 0;
3278         surface->overlay_srcrect.right = surface->resource.width;
3279         surface->overlay_srcrect.bottom = surface->resource.height;
3280     }
3281
3282     if (dst_rect)
3283     {
3284         surface->overlay_destrect = *dst_rect;
3285     }
3286     else
3287     {
3288         surface->overlay_destrect.left = 0;
3289         surface->overlay_destrect.top = 0;
3290         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3291         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3292     }
3293
3294     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3295     {
3296         surface->overlay_dest = NULL;
3297         list_remove(&surface->overlay_entry);
3298     }
3299
3300     if (flags & WINEDDOVER_SHOW)
3301     {
3302         if (surface->overlay_dest != dst_surface)
3303         {
3304             surface->overlay_dest = dst_surface;
3305             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3306         }
3307     }
3308     else if (flags & WINEDDOVER_HIDE)
3309     {
3310         /* tests show that the rectangles are erased on hide */
3311         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3312         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3313         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3314         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3315         surface->overlay_dest = NULL;
3316     }
3317
3318     surface_draw_overlay(surface);
3319
3320     return WINED3D_OK;
3321 }
3322
3323 HRESULT CDECL wined3d_surface_update_desc(struct wined3d_surface *surface,
3324         UINT width, UINT height, enum wined3d_format_id format_id,
3325         enum wined3d_multisample_type multisample_type, UINT multisample_quality)
3326 {
3327     struct wined3d_device *device = surface->resource.device;
3328     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
3329     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
3330     UINT resource_size = wined3d_format_calculate_size(format, device->surface_alignment, width, height);
3331
3332     TRACE("surface %p, width %u, height %u, format %s, multisample_type %#x, multisample_quality %u.\n",
3333             surface, width, height, debug_d3dformat(format_id), multisample_type, multisample_type);
3334
3335     if (!resource_size)
3336         return WINED3DERR_INVALIDCALL;
3337
3338     if (device->d3d_initialized)
3339         surface->resource.resource_ops->resource_unload(&surface->resource);
3340
3341     if (surface->flags & SFLAG_DIBSECTION)
3342     {
3343         DeleteDC(surface->hDC);
3344         DeleteObject(surface->dib.DIBsection);
3345         surface->dib.bitmap_data = NULL;
3346         surface->flags &= ~SFLAG_DIBSECTION;
3347     }
3348
3349     surface->flags &= ~(SFLAG_LOCATIONS | SFLAG_USERPTR);
3350     surface->resource.allocatedMemory = NULL;
3351     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
3352     surface->resource.heapMemory = NULL;
3353
3354     surface->resource.width = width;
3355     surface->resource.height = height;
3356     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[ARB_TEXTURE_RECTANGLE]
3357             || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
3358     {
3359         surface->pow2Width = width;
3360         surface->pow2Height = height;
3361     }
3362     else
3363     {
3364         surface->pow2Width = surface->pow2Height = 1;
3365         while (surface->pow2Width < width)
3366             surface->pow2Width <<= 1;
3367         while (surface->pow2Height < height)
3368             surface->pow2Height <<= 1;
3369     }
3370
3371     if (surface->pow2Width != width || surface->pow2Height != height)
3372         surface->flags |= SFLAG_NONPOW2;
3373     else
3374         surface->flags &= ~SFLAG_NONPOW2;
3375
3376     surface->resource.format = format;
3377     surface->resource.multisample_type = multisample_type;
3378     surface->resource.multisample_quality = multisample_quality;
3379     surface->resource.size = resource_size;
3380
3381     if (!surface_init_sysmem(surface))
3382         return E_OUTOFMEMORY;
3383
3384     return WINED3D_OK;
3385 }
3386
3387 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3388         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3389 {
3390     unsigned short *dst_s;
3391     const float *src_f;
3392     unsigned int x, y;
3393
3394     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3395
3396     for (y = 0; y < h; ++y)
3397     {
3398         src_f = (const float *)(src + y * pitch_in);
3399         dst_s = (unsigned short *) (dst + y * pitch_out);
3400         for (x = 0; x < w; ++x)
3401         {
3402             dst_s[x] = float_32_to_16(src_f + x);
3403         }
3404     }
3405 }
3406
3407 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3408         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3409 {
3410     static const unsigned char convert_5to8[] =
3411     {
3412         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3413         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3414         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3415         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3416     };
3417     static const unsigned char convert_6to8[] =
3418     {
3419         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3420         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3421         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3422         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3423         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3424         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3425         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3426         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3427     };
3428     unsigned int x, y;
3429
3430     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3431
3432     for (y = 0; y < h; ++y)
3433     {
3434         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3435         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3436         for (x = 0; x < w; ++x)
3437         {
3438             WORD pixel = src_line[x];
3439             dst_line[x] = 0xff000000
3440                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3441                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3442                     | convert_5to8[(pixel & 0x001f)];
3443         }
3444     }
3445 }
3446
3447 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3448  * in both cases we're just setting the X / Alpha channel to 0xff. */
3449 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3450         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3451 {
3452     unsigned int x, y;
3453
3454     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3455
3456     for (y = 0; y < h; ++y)
3457     {
3458         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3459         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3460
3461         for (x = 0; x < w; ++x)
3462         {
3463             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3464         }
3465     }
3466 }
3467
3468 static inline BYTE cliptobyte(int x)
3469 {
3470     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3471 }
3472
3473 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3474         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3475 {
3476     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3477     unsigned int x, y;
3478
3479     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3480
3481     for (y = 0; y < h; ++y)
3482     {
3483         const BYTE *src_line = src + y * pitch_in;
3484         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3485         for (x = 0; x < w; ++x)
3486         {
3487             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3488              *     C = Y - 16; D = U - 128; E = V - 128;
3489              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3490              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3491              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3492              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3493              * U and V are shared between the pixels. */
3494             if (!(x & 1)) /* For every even pixel, read new U and V. */
3495             {
3496                 d = (int) src_line[1] - 128;
3497                 e = (int) src_line[3] - 128;
3498                 r2 = 409 * e + 128;
3499                 g2 = - 100 * d - 208 * e + 128;
3500                 b2 = 516 * d + 128;
3501             }
3502             c2 = 298 * ((int) src_line[0] - 16);
3503             dst_line[x] = 0xff000000
3504                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3505                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3506                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3507                 /* Scale RGB values to 0..255 range,
3508                  * then clip them if still not in range (may be negative),
3509                  * then shift them within DWORD if necessary. */
3510             src_line += 2;
3511         }
3512     }
3513 }
3514
3515 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3516         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3517 {
3518     unsigned int x, y;
3519     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3520
3521     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3522
3523     for (y = 0; y < h; ++y)
3524     {
3525         const BYTE *src_line = src + y * pitch_in;
3526         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3527         for (x = 0; x < w; ++x)
3528         {
3529             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3530              *     C = Y - 16; D = U - 128; E = V - 128;
3531              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3532              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3533              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3534              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3535              * U and V are shared between the pixels. */
3536             if (!(x & 1)) /* For every even pixel, read new U and V. */
3537             {
3538                 d = (int) src_line[1] - 128;
3539                 e = (int) src_line[3] - 128;
3540                 r2 = 409 * e + 128;
3541                 g2 = - 100 * d - 208 * e + 128;
3542                 b2 = 516 * d + 128;
3543             }
3544             c2 = 298 * ((int) src_line[0] - 16);
3545             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3546                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3547                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3548                 /* Scale RGB values to 0..255 range,
3549                  * then clip them if still not in range (may be negative),
3550                  * then shift them within DWORD if necessary. */
3551             src_line += 2;
3552         }
3553     }
3554 }
3555
3556 struct d3dfmt_convertor_desc
3557 {
3558     enum wined3d_format_id from, to;
3559     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3560 };
3561
3562 static const struct d3dfmt_convertor_desc convertors[] =
3563 {
3564     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3565     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3566     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3567     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3568     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3569     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3570 };
3571
3572 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3573         enum wined3d_format_id to)
3574 {
3575     unsigned int i;
3576
3577     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3578     {
3579         if (convertors[i].from == from && convertors[i].to == to)
3580             return &convertors[i];
3581     }
3582
3583     return NULL;
3584 }
3585
3586 /*****************************************************************************
3587  * surface_convert_format
3588  *
3589  * Creates a duplicate of a surface in a different format. Is used by Blt to
3590  * blit between surfaces with different formats.
3591  *
3592  * Parameters
3593  *  source: Source surface
3594  *  fmt: Requested destination format
3595  *
3596  *****************************************************************************/
3597 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3598 {
3599     struct wined3d_mapped_rect src_map, dst_map;
3600     const struct d3dfmt_convertor_desc *conv;
3601     struct wined3d_surface *ret = NULL;
3602     HRESULT hr;
3603
3604     conv = find_convertor(source->resource.format->id, to_fmt);
3605     if (!conv)
3606     {
3607         FIXME("Cannot find a conversion function from format %s to %s.\n",
3608                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3609         return NULL;
3610     }
3611
3612     wined3d_surface_create(source->resource.device, source->resource.width,
3613             source->resource.height, to_fmt, 0 /* level */, 0 /* usage */, WINED3D_POOL_SCRATCH,
3614             WINED3D_MULTISAMPLE_NONE /* TODO: Multisampled conversion */, 0 /* MultiSampleQuality */,
3615             source->surface_type, WINED3D_SURFACE_MAPPABLE | WINED3D_SURFACE_DISCARD,
3616             NULL /* parent */, &wined3d_null_parent_ops, &ret);
3617     if (!ret)
3618     {
3619         ERR("Failed to create a destination surface for conversion.\n");
3620         return NULL;
3621     }
3622
3623     memset(&src_map, 0, sizeof(src_map));
3624     memset(&dst_map, 0, sizeof(dst_map));
3625
3626     hr = wined3d_surface_map(source, &src_map, NULL, WINED3DLOCK_READONLY);
3627     if (FAILED(hr))
3628     {
3629         ERR("Failed to lock the source surface.\n");
3630         wined3d_surface_decref(ret);
3631         return NULL;
3632     }
3633     hr = wined3d_surface_map(ret, &dst_map, NULL, WINED3DLOCK_READONLY);
3634     if (FAILED(hr))
3635     {
3636         ERR("Failed to lock the destination surface.\n");
3637         wined3d_surface_unmap(source);
3638         wined3d_surface_decref(ret);
3639         return NULL;
3640     }
3641
3642     conv->convert(src_map.data, dst_map.data, src_map.row_pitch, dst_map.row_pitch,
3643             source->resource.width, source->resource.height);
3644
3645     wined3d_surface_unmap(ret);
3646     wined3d_surface_unmap(source);
3647
3648     return ret;
3649 }
3650
3651 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3652         unsigned int bpp, UINT pitch, DWORD color)
3653 {
3654     BYTE *first;
3655     int x, y;
3656
3657     /* Do first row */
3658
3659 #define COLORFILL_ROW(type) \
3660 do { \
3661     type *d = (type *)buf; \
3662     for (x = 0; x < width; ++x) \
3663         d[x] = (type)color; \
3664 } while(0)
3665
3666     switch (bpp)
3667     {
3668         case 1:
3669             COLORFILL_ROW(BYTE);
3670             break;
3671
3672         case 2:
3673             COLORFILL_ROW(WORD);
3674             break;
3675
3676         case 3:
3677         {
3678             BYTE *d = buf;
3679             for (x = 0; x < width; ++x, d += 3)
3680             {
3681                 d[0] = (color      ) & 0xFF;
3682                 d[1] = (color >>  8) & 0xFF;
3683                 d[2] = (color >> 16) & 0xFF;
3684             }
3685             break;
3686         }
3687         case 4:
3688             COLORFILL_ROW(DWORD);
3689             break;
3690
3691         default:
3692             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3693             return WINED3DERR_NOTAVAILABLE;
3694     }
3695
3696 #undef COLORFILL_ROW
3697
3698     /* Now copy first row. */
3699     first = buf;
3700     for (y = 1; y < height; ++y)
3701     {
3702         buf += pitch;
3703         memcpy(buf, first, width * bpp);
3704     }
3705
3706     return WINED3D_OK;
3707 }
3708
3709 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3710 {
3711     TRACE("surface %p.\n", surface);
3712
3713     if (!(surface->flags & SFLAG_LOCKED))
3714     {
3715         WARN("Trying to unmap unmapped surface.\n");
3716         return WINEDDERR_NOTLOCKED;
3717     }
3718     surface->flags &= ~SFLAG_LOCKED;
3719
3720     surface->surface_ops->surface_unmap(surface);
3721
3722     return WINED3D_OK;
3723 }
3724
3725 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3726         struct wined3d_mapped_rect *mapped_rect, const RECT *rect, DWORD flags)
3727 {
3728     const struct wined3d_format *format = surface->resource.format;
3729
3730     TRACE("surface %p, mapped_rect %p, rect %s, flags %#x.\n",
3731             surface, mapped_rect, wine_dbgstr_rect(rect), flags);
3732
3733     if (surface->flags & SFLAG_LOCKED)
3734     {
3735         WARN("Surface is already mapped.\n");
3736         return WINED3DERR_INVALIDCALL;
3737     }
3738     if ((format->flags & WINED3DFMT_FLAG_BLOCKS)
3739             && rect && (rect->left || rect->top
3740             || rect->right != surface->resource.width
3741             || rect->bottom != surface->resource.height))
3742     {
3743         UINT width_mask = format->block_width - 1;
3744         UINT height_mask = format->block_height - 1;
3745
3746         if ((rect->left & width_mask) || (rect->right & width_mask)
3747                 || (rect->top & height_mask) || (rect->bottom & height_mask))
3748         {
3749             WARN("Map rect %s is misaligned for %ux%u blocks.\n",
3750                     wine_dbgstr_rect(rect), format->block_width, format->block_height);
3751
3752             if (surface->resource.pool == WINED3D_POOL_DEFAULT)
3753                 return WINED3DERR_INVALIDCALL;
3754         }
3755     }
3756
3757     surface->flags |= SFLAG_LOCKED;
3758
3759     if (!(surface->flags & SFLAG_LOCKABLE))
3760         WARN("Trying to lock unlockable surface.\n");
3761
3762     /* Performance optimization: Count how often a surface is mapped, if it is
3763      * mapped regularly do not throw away the system memory copy. This avoids
3764      * the need to download the surface from OpenGL all the time. The surface
3765      * is still downloaded if the OpenGL texture is changed. */
3766     if (!(surface->flags & SFLAG_DYNLOCK))
3767     {
3768         if (++surface->lockCount > MAXLOCKCOUNT)
3769         {
3770             TRACE("Surface is mapped regularly, not freeing the system memory copy any more.\n");
3771             surface->flags |= SFLAG_DYNLOCK;
3772         }
3773     }
3774
3775     surface->surface_ops->surface_map(surface, rect, flags);
3776
3777     if (format->flags & WINED3DFMT_FLAG_BROKEN_PITCH)
3778         mapped_rect->row_pitch = surface->resource.width * format->byte_count;
3779     else
3780         mapped_rect->row_pitch = wined3d_surface_get_pitch(surface);
3781
3782     if (!rect)
3783     {
3784         mapped_rect->data = surface->resource.allocatedMemory;
3785         surface->lockedRect.left = 0;
3786         surface->lockedRect.top = 0;
3787         surface->lockedRect.right = surface->resource.width;
3788         surface->lockedRect.bottom = surface->resource.height;
3789     }
3790     else
3791     {
3792         if ((format->flags & (WINED3DFMT_FLAG_BLOCKS | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_BLOCKS)
3793         {
3794             /* Compressed textures are block based, so calculate the offset of
3795              * the block that contains the top-left pixel of the locked rectangle. */
3796             mapped_rect->data = surface->resource.allocatedMemory
3797                     + ((rect->top / format->block_height) * mapped_rect->row_pitch)
3798                     + ((rect->left / format->block_width) * format->block_byte_count);
3799         }
3800         else
3801         {
3802             mapped_rect->data = surface->resource.allocatedMemory
3803                     + (mapped_rect->row_pitch * rect->top)
3804                     + (rect->left * format->byte_count);
3805         }
3806         surface->lockedRect.left = rect->left;
3807         surface->lockedRect.top = rect->top;
3808         surface->lockedRect.right = rect->right;
3809         surface->lockedRect.bottom = rect->bottom;
3810     }
3811
3812     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3813     TRACE("Returning memory %p, pitch %u.\n", mapped_rect->data, mapped_rect->row_pitch);
3814
3815     return WINED3D_OK;
3816 }
3817
3818 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3819 {
3820     struct wined3d_mapped_rect map;
3821     HRESULT hr;
3822
3823     TRACE("surface %p, dc %p.\n", surface, dc);
3824
3825     if (surface->flags & SFLAG_USERPTR)
3826     {
3827         ERR("Not supported on surfaces with application-provided memory.\n");
3828         return WINEDDERR_NODC;
3829     }
3830
3831     /* Give more detailed info for ddraw. */
3832     if (surface->flags & SFLAG_DCINUSE)
3833         return WINEDDERR_DCALREADYCREATED;
3834
3835     /* Can't GetDC if the surface is locked. */
3836     if (surface->flags & SFLAG_LOCKED)
3837         return WINED3DERR_INVALIDCALL;
3838
3839     /* Create a DIB section if there isn't a dc yet. */
3840     if (!surface->hDC)
3841     {
3842         if (surface->flags & SFLAG_CLIENT)
3843         {
3844             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3845             surface_release_client_storage(surface);
3846         }
3847         hr = surface_create_dib_section(surface);
3848         if (FAILED(hr))
3849             return WINED3DERR_INVALIDCALL;
3850
3851         /* Use the DIB section from now on if we are not using a PBO. */
3852         if (!(surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM)))
3853         {
3854             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
3855             surface->resource.heapMemory = NULL;
3856             surface->resource.allocatedMemory = surface->dib.bitmap_data;
3857         }
3858     }
3859
3860     /* Map the surface. */
3861     hr = wined3d_surface_map(surface, &map, NULL, 0);
3862     if (FAILED(hr))
3863     {
3864         ERR("Map failed, hr %#x.\n", hr);
3865         return hr;
3866     }
3867
3868     /* Sync the DIB with the PBO. This can't be done earlier because Map()
3869      * activates the allocatedMemory. */
3870     if (surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM))
3871         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
3872
3873     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3874             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3875     {
3876         /* GetDC on palettized formats is unsupported in D3D9, and the method
3877          * is missing in D3D8, so this should only be used for DX <=7
3878          * surfaces (with non-device palettes). */
3879         const PALETTEENTRY *pal = NULL;
3880
3881         if (surface->palette)
3882         {
3883             pal = surface->palette->palents;
3884         }
3885         else
3886         {
3887             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3888             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3889
3890             if (dds_primary && dds_primary->palette)
3891                 pal = dds_primary->palette->palents;
3892         }
3893
3894         if (pal)
3895         {
3896             RGBQUAD col[256];
3897             unsigned int i;
3898
3899             for (i = 0; i < 256; ++i)
3900             {
3901                 col[i].rgbRed = pal[i].peRed;
3902                 col[i].rgbGreen = pal[i].peGreen;
3903                 col[i].rgbBlue = pal[i].peBlue;
3904                 col[i].rgbReserved = 0;
3905             }
3906             SetDIBColorTable(surface->hDC, 0, 256, col);
3907         }
3908     }
3909
3910     surface->flags |= SFLAG_DCINUSE;
3911
3912     *dc = surface->hDC;
3913     TRACE("Returning dc %p.\n", *dc);
3914
3915     return WINED3D_OK;
3916 }
3917
3918 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3919 {
3920     TRACE("surface %p, dc %p.\n", surface, dc);
3921
3922     if (!(surface->flags & SFLAG_DCINUSE))
3923         return WINEDDERR_NODC;
3924
3925     if (surface->hDC != dc)
3926     {
3927         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3928                 dc, surface->hDC);
3929         return WINEDDERR_NODC;
3930     }
3931
3932     /* Copy the contents of the DIB over to the PBO. */
3933     if ((surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM)) && surface->resource.allocatedMemory)
3934         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
3935
3936     /* We locked first, so unlock now. */
3937     wined3d_surface_unmap(surface);
3938
3939     surface->flags &= ~SFLAG_DCINUSE;
3940
3941     return WINED3D_OK;
3942 }
3943
3944 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3945 {
3946     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3947
3948     if (flags)
3949     {
3950         static UINT once;
3951         if (!once++)
3952             FIXME("Ignoring flags %#x.\n", flags);
3953         else
3954             WARN("Ignoring flags %#x.\n", flags);
3955     }
3956
3957     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
3958     {
3959         ERR("Not supported on swapchain surfaces.\n");
3960         return WINEDDERR_NOTFLIPPABLE;
3961     }
3962
3963     /* Flipping is only supported on render targets and overlays. */
3964     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
3965     {
3966         WARN("Tried to flip a non-render target, non-overlay surface.\n");
3967         return WINEDDERR_NOTFLIPPABLE;
3968     }
3969
3970     flip_surface(surface, override);
3971
3972     /* Update overlays if they're visible. */
3973     if ((surface->resource.usage & WINED3DUSAGE_OVERLAY) && surface->overlay_dest)
3974         return surface_draw_overlay(surface);
3975
3976     return WINED3D_OK;
3977 }
3978
3979 /* Do not call while under the GL lock. */
3980 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3981 {
3982     struct wined3d_device *device = surface->resource.device;
3983
3984     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3985
3986     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3987     {
3988         struct wined3d_texture *texture = surface->container.u.texture;
3989
3990         TRACE("Passing to container (%p).\n", texture);
3991         texture->texture_ops->texture_preload(texture, srgb);
3992     }
3993     else
3994     {
3995         struct wined3d_context *context;
3996
3997         TRACE("(%p) : About to load surface\n", surface);
3998
3999         /* TODO: Use already acquired context when possible. */
4000         context = context_acquire(device, NULL);
4001
4002         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
4003
4004         if (surface->resource.pool == WINED3D_POOL_DEFAULT)
4005         {
4006             /* Tell opengl to try and keep this texture in video ram (well mostly) */
4007             GLclampf tmp;
4008             tmp = 0.9f;
4009             ENTER_GL();
4010             glPrioritizeTextures(1, &surface->texture_name, &tmp);
4011             LEAVE_GL();
4012         }
4013
4014         context_release(context);
4015     }
4016 }
4017
4018 BOOL surface_init_sysmem(struct wined3d_surface *surface)
4019 {
4020     if (!surface->resource.allocatedMemory)
4021     {
4022         if (!surface->resource.heapMemory)
4023         {
4024             if (!(surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
4025                     surface->resource.size + RESOURCE_ALIGNMENT)))
4026             {
4027                 ERR("Failed to allocate memory.\n");
4028                 return FALSE;
4029             }
4030         }
4031         else if (!(surface->flags & SFLAG_CLIENT))
4032         {
4033             ERR("Surface %p has heapMemory %p and flags %#x.\n",
4034                     surface, surface->resource.heapMemory, surface->flags);
4035         }
4036
4037         surface->resource.allocatedMemory =
4038             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
4039     }
4040     else
4041     {
4042         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
4043     }
4044
4045     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
4046
4047     return TRUE;
4048 }
4049
4050 /* Read the framebuffer back into the surface */
4051 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
4052 {
4053     struct wined3d_device *device = surface->resource.device;
4054     const struct wined3d_gl_info *gl_info;
4055     struct wined3d_context *context;
4056     BYTE *mem;
4057     GLint fmt;
4058     GLint type;
4059     BYTE *row, *top, *bottom;
4060     int i;
4061     BOOL bpp;
4062     RECT local_rect;
4063     BOOL srcIsUpsideDown;
4064     GLint rowLen = 0;
4065     GLint skipPix = 0;
4066     GLint skipRow = 0;
4067
4068     context = context_acquire(device, surface);
4069     context_apply_blit_state(context, device);
4070     gl_info = context->gl_info;
4071
4072     ENTER_GL();
4073
4074     /* Select the correct read buffer, and give some debug output.
4075      * There is no need to keep track of the current read buffer or reset it, every part of the code
4076      * that reads sets the read buffer as desired.
4077      */
4078     if (surface_is_offscreen(surface))
4079     {
4080         /* Mapping the primary render target which is not on a swapchain.
4081          * Read from the back buffer. */
4082         TRACE("Mapping offscreen render target.\n");
4083         glReadBuffer(device->offscreenBuffer);
4084         srcIsUpsideDown = TRUE;
4085     }
4086     else
4087     {
4088         /* Onscreen surfaces are always part of a swapchain */
4089         GLenum buffer = surface_get_gl_buffer(surface);
4090         TRACE("Mapping %#x buffer.\n", buffer);
4091         glReadBuffer(buffer);
4092         checkGLcall("glReadBuffer");
4093         srcIsUpsideDown = FALSE;
4094     }
4095
4096     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
4097     if (!rect)
4098     {
4099         local_rect.left = 0;
4100         local_rect.top = 0;
4101         local_rect.right = surface->resource.width;
4102         local_rect.bottom = surface->resource.height;
4103     }
4104     else
4105     {
4106         local_rect = *rect;
4107     }
4108     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
4109
4110     switch (surface->resource.format->id)
4111     {
4112         case WINED3DFMT_P8_UINT:
4113         {
4114             if (primary_render_target_is_p8(device))
4115             {
4116                 /* In case of P8 render targets the index is stored in the alpha component */
4117                 fmt = GL_ALPHA;
4118                 type = GL_UNSIGNED_BYTE;
4119                 mem = dest;
4120                 bpp = surface->resource.format->byte_count;
4121             }
4122             else
4123             {
4124                 /* GL can't return palettized data, so read ARGB pixels into a
4125                  * separate block of memory and convert them into palettized format
4126                  * in software. Slow, but if the app means to use palettized render
4127                  * targets and locks it...
4128                  *
4129                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4130                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4131                  * for the color channels when palettizing the colors.
4132                  */
4133                 fmt = GL_RGB;
4134                 type = GL_UNSIGNED_BYTE;
4135                 pitch *= 3;
4136                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4137                 if (!mem)
4138                 {
4139                     ERR("Out of memory\n");
4140                     LEAVE_GL();
4141                     return;
4142                 }
4143                 bpp = surface->resource.format->byte_count * 3;
4144             }
4145         }
4146         break;
4147
4148         default:
4149             mem = dest;
4150             fmt = surface->resource.format->glFormat;
4151             type = surface->resource.format->glType;
4152             bpp = surface->resource.format->byte_count;
4153     }
4154
4155     if (surface->flags & SFLAG_PBO)
4156     {
4157         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4158         checkGLcall("glBindBufferARB");
4159         if (mem)
4160         {
4161             ERR("mem not null for pbo -- unexpected\n");
4162             mem = NULL;
4163         }
4164     }
4165
4166     /* Save old pixel store pack state */
4167     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4168     checkGLcall("glGetIntegerv");
4169     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4170     checkGLcall("glGetIntegerv");
4171     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4172     checkGLcall("glGetIntegerv");
4173
4174     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4175     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4176     checkGLcall("glPixelStorei");
4177     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4178     checkGLcall("glPixelStorei");
4179     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4180     checkGLcall("glPixelStorei");
4181
4182     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4183             local_rect.right - local_rect.left,
4184             local_rect.bottom - local_rect.top,
4185             fmt, type, mem);
4186     checkGLcall("glReadPixels");
4187
4188     /* Reset previous pixel store pack state */
4189     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4190     checkGLcall("glPixelStorei");
4191     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4192     checkGLcall("glPixelStorei");
4193     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4194     checkGLcall("glPixelStorei");
4195
4196     if (surface->flags & SFLAG_PBO)
4197     {
4198         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4199         checkGLcall("glBindBufferARB");
4200
4201         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4202          * to get a pointer to it and perform the flipping in software. This is a lot
4203          * faster than calling glReadPixels for each line. In case we want more speed
4204          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4205         if (!srcIsUpsideDown)
4206         {
4207             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4208             checkGLcall("glBindBufferARB");
4209
4210             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4211             checkGLcall("glMapBufferARB");
4212         }
4213     }
4214
4215     /* TODO: Merge this with the palettization loop below for P8 targets */
4216     if(!srcIsUpsideDown) {
4217         UINT len, off;
4218         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4219             Flip the lines in software */
4220         len = (local_rect.right - local_rect.left) * bpp;
4221         off = local_rect.left * bpp;
4222
4223         row = HeapAlloc(GetProcessHeap(), 0, len);
4224         if(!row) {
4225             ERR("Out of memory\n");
4226             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4227                 HeapFree(GetProcessHeap(), 0, mem);
4228             LEAVE_GL();
4229             return;
4230         }
4231
4232         top = mem + pitch * local_rect.top;
4233         bottom = mem + pitch * (local_rect.bottom - 1);
4234         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4235             memcpy(row, top + off, len);
4236             memcpy(top + off, bottom + off, len);
4237             memcpy(bottom + off, row, len);
4238             top += pitch;
4239             bottom -= pitch;
4240         }
4241         HeapFree(GetProcessHeap(), 0, row);
4242
4243         /* Unmap the temp PBO buffer */
4244         if (surface->flags & SFLAG_PBO)
4245         {
4246             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4247             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4248         }
4249     }
4250
4251     LEAVE_GL();
4252     context_release(context);
4253
4254     /* For P8 textures we need to perform an inverse palette lookup. This is
4255      * done by searching for a palette index which matches the RGB value.
4256      * Note this isn't guaranteed to work when there are multiple entries for
4257      * the same color but we have no choice. In case of P8 render targets,
4258      * the index is stored in the alpha component so no conversion is needed. */
4259     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4260     {
4261         const PALETTEENTRY *pal = NULL;
4262         DWORD width = pitch / 3;
4263         int x, y, c;
4264
4265         if (surface->palette)
4266         {
4267             pal = surface->palette->palents;
4268         }
4269         else
4270         {
4271             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4272             HeapFree(GetProcessHeap(), 0, mem);
4273             return;
4274         }
4275
4276         for(y = local_rect.top; y < local_rect.bottom; y++) {
4277             for(x = local_rect.left; x < local_rect.right; x++) {
4278                 /*                      start              lines            pixels      */
4279                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4280                 const BYTE *green = blue  + 1;
4281                 const BYTE *red = green + 1;
4282
4283                 for(c = 0; c < 256; c++) {
4284                     if(*red   == pal[c].peRed   &&
4285                        *green == pal[c].peGreen &&
4286                        *blue  == pal[c].peBlue)
4287                     {
4288                         *((BYTE *) dest + y * width + x) = c;
4289                         break;
4290                     }
4291                 }
4292             }
4293         }
4294         HeapFree(GetProcessHeap(), 0, mem);
4295     }
4296 }
4297
4298 /* Read the framebuffer contents into a texture. Note that this function
4299  * doesn't do any kind of flipping. Using this on an onscreen surface will
4300  * result in a flipped D3D texture. */
4301 void surface_load_fb_texture(struct wined3d_surface *surface, BOOL srgb)
4302 {
4303     struct wined3d_device *device = surface->resource.device;
4304     struct wined3d_context *context;
4305
4306     context = context_acquire(device, surface);
4307     device_invalidate_state(device, STATE_FRAMEBUFFER);
4308
4309     surface_prepare_texture(surface, context, srgb);
4310     surface_bind_and_dirtify(surface, context, srgb);
4311
4312     TRACE("Reading back offscreen render target %p.\n", surface);
4313
4314     ENTER_GL();
4315
4316     if (surface_is_offscreen(surface))
4317         glReadBuffer(device->offscreenBuffer);
4318     else
4319         glReadBuffer(surface_get_gl_buffer(surface));
4320     checkGLcall("glReadBuffer");
4321
4322     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4323             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4324     checkGLcall("glCopyTexSubImage2D");
4325
4326     LEAVE_GL();
4327
4328     context_release(context);
4329 }
4330
4331 /* Context activation is done by the caller. */
4332 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4333         struct wined3d_context *context, BOOL srgb)
4334 {
4335     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4336     CONVERT_TYPES convert;
4337     struct wined3d_format format;
4338
4339     if (surface->flags & alloc_flag) return;
4340
4341     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4342     if (convert != NO_CONVERSION || format.convert) surface->flags |= SFLAG_CONVERTED;
4343     else surface->flags &= ~SFLAG_CONVERTED;
4344
4345     surface_bind_and_dirtify(surface, context, srgb);
4346     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4347     surface->flags |= alloc_flag;
4348 }
4349
4350 /* Context activation is done by the caller. */
4351 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4352 {
4353     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4354     {
4355         struct wined3d_texture *texture = surface->container.u.texture;
4356         UINT sub_count = texture->level_count * texture->layer_count;
4357         UINT i;
4358
4359         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4360
4361         for (i = 0; i < sub_count; ++i)
4362         {
4363             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4364             surface_prepare_texture_internal(s, context, srgb);
4365         }
4366
4367         return;
4368     }
4369
4370     surface_prepare_texture_internal(surface, context, srgb);
4371 }
4372
4373 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4374 {
4375     if (multisample)
4376     {
4377         if (surface->rb_multisample)
4378             return;
4379
4380         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4381         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4382         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4383                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4384         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4385     }
4386     else
4387     {
4388         if (surface->rb_resolved)
4389             return;
4390
4391         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4392         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4393         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4394                 surface->pow2Width, surface->pow2Height);
4395         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4396     }
4397 }
4398
4399 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4400         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4401 {
4402     struct wined3d_device *device = surface->resource.device;
4403     UINT pitch = wined3d_surface_get_pitch(surface);
4404     const struct wined3d_gl_info *gl_info;
4405     struct wined3d_context *context;
4406     RECT local_rect;
4407     UINT w, h;
4408
4409     surface_get_rect(surface, rect, &local_rect);
4410
4411     mem += local_rect.top * pitch + local_rect.left * bpp;
4412     w = local_rect.right - local_rect.left;
4413     h = local_rect.bottom - local_rect.top;
4414
4415     /* Activate the correct context for the render target */
4416     context = context_acquire(device, surface);
4417     context_apply_blit_state(context, device);
4418     gl_info = context->gl_info;
4419
4420     ENTER_GL();
4421
4422     if (!surface_is_offscreen(surface))
4423     {
4424         GLenum buffer = surface_get_gl_buffer(surface);
4425         TRACE("Unlocking %#x buffer.\n", buffer);
4426         context_set_draw_buffer(context, buffer);
4427
4428         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4429         glPixelZoom(1.0f, -1.0f);
4430     }
4431     else
4432     {
4433         /* Primary offscreen render target */
4434         TRACE("Offscreen render target.\n");
4435         context_set_draw_buffer(context, device->offscreenBuffer);
4436
4437         glPixelZoom(1.0f, 1.0f);
4438     }
4439
4440     glRasterPos3i(local_rect.left, local_rect.top, 1);
4441     checkGLcall("glRasterPos3i");
4442
4443     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4444     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4445
4446     if (surface->flags & SFLAG_PBO)
4447     {
4448         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4449         checkGLcall("glBindBufferARB");
4450     }
4451
4452     glDrawPixels(w, h, fmt, type, mem);
4453     checkGLcall("glDrawPixels");
4454
4455     if (surface->flags & SFLAG_PBO)
4456     {
4457         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4458         checkGLcall("glBindBufferARB");
4459     }
4460
4461     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4462     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4463
4464     LEAVE_GL();
4465
4466     if (wined3d_settings.strict_draw_ordering
4467             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4468             && surface->container.u.swapchain->front_buffer == surface))
4469         wglFlush();
4470
4471     context_release(context);
4472 }
4473
4474 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck,
4475         BOOL use_texturing, struct wined3d_format *format, CONVERT_TYPES *convert)
4476 {
4477     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4478     const struct wined3d_device *device = surface->resource.device;
4479     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4480     BOOL blit_supported = FALSE;
4481
4482     /* Copy the default values from the surface. Below we might perform fixups */
4483     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4484     *format = *surface->resource.format;
4485     *convert = NO_CONVERSION;
4486
4487     /* Ok, now look if we have to do any conversion */
4488     switch (surface->resource.format->id)
4489     {
4490         case WINED3DFMT_P8_UINT:
4491             /* Below the call to blit_supported is disabled for Wine 1.2
4492              * because the function isn't operating correctly yet. At the
4493              * moment 8-bit blits are handled in software and if certain GL
4494              * extensions are around, surface conversion is performed at
4495              * upload time. The blit_supported call recognizes it as a
4496              * destination fixup. This type of upload 'fixup' and 8-bit to
4497              * 8-bit blits need to be handled by the blit_shader.
4498              * TODO: get rid of this #if 0. */
4499 #if 0
4500             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4501                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4502                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4503 #endif
4504             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4505
4506             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4507              * texturing. Further also use conversion in case of color keying.
4508              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4509              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4510              * conflicts with this.
4511              */
4512             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4513                     || colorkey_active || !use_texturing)
4514             {
4515                 format->glFormat = GL_RGBA;
4516                 format->glInternal = GL_RGBA;
4517                 format->glType = GL_UNSIGNED_BYTE;
4518                 format->conv_byte_count = 4;
4519                 if (colorkey_active)
4520                     *convert = CONVERT_PALETTED_CK;
4521                 else
4522                     *convert = CONVERT_PALETTED;
4523             }
4524             break;
4525
4526         case WINED3DFMT_B2G3R3_UNORM:
4527             /* **********************
4528                 GL_UNSIGNED_BYTE_3_3_2
4529                 ********************** */
4530             if (colorkey_active) {
4531                 /* This texture format will never be used.. So do not care about color keying
4532                     up until the point in time it will be needed :-) */
4533                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4534             }
4535             break;
4536
4537         case WINED3DFMT_B5G6R5_UNORM:
4538             if (colorkey_active)
4539             {
4540                 *convert = CONVERT_CK_565;
4541                 format->glFormat = GL_RGBA;
4542                 format->glInternal = GL_RGB5_A1;
4543                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4544                 format->conv_byte_count = 2;
4545             }
4546             break;
4547
4548         case WINED3DFMT_B5G5R5X1_UNORM:
4549             if (colorkey_active)
4550             {
4551                 *convert = CONVERT_CK_5551;
4552                 format->glFormat = GL_BGRA;
4553                 format->glInternal = GL_RGB5_A1;
4554                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4555                 format->conv_byte_count = 2;
4556             }
4557             break;
4558
4559         case WINED3DFMT_B8G8R8_UNORM:
4560             if (colorkey_active)
4561             {
4562                 *convert = CONVERT_CK_RGB24;
4563                 format->glFormat = GL_RGBA;
4564                 format->glInternal = GL_RGBA8;
4565                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4566                 format->conv_byte_count = 4;
4567             }
4568             break;
4569
4570         case WINED3DFMT_B8G8R8X8_UNORM:
4571             if (colorkey_active)
4572             {
4573                 *convert = CONVERT_RGB32_888;
4574                 format->glFormat = GL_RGBA;
4575                 format->glInternal = GL_RGBA8;
4576                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4577                 format->conv_byte_count = 4;
4578             }
4579             break;
4580
4581         default:
4582             break;
4583     }
4584
4585     if (*convert != NO_CONVERSION)
4586     {
4587         format->rtInternal = format->glInternal;
4588         format->glGammaInternal = format->glInternal;
4589     }
4590
4591     return WINED3D_OK;
4592 }
4593
4594 static BOOL color_in_range(const struct wined3d_color_key *color_key, DWORD color)
4595 {
4596     /* FIXME: Is this really how color keys are supposed to work? I think it
4597      * makes more sense to compare the individual channels. */
4598     return color >= color_key->color_space_low_value
4599             && color <= color_key->color_space_high_value;
4600 }
4601
4602 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4603 {
4604     const struct wined3d_device *device = surface->resource.device;
4605     const struct wined3d_palette *pal = surface->palette;
4606     BOOL index_in_alpha = FALSE;
4607     unsigned int i;
4608
4609     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4610      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4611      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4612      * duplicate entries. Store the color key in the unused alpha component to speed the
4613      * download up and to make conversion unneeded. */
4614     index_in_alpha = primary_render_target_is_p8(device);
4615
4616     if (!pal)
4617     {
4618         ERR("This code should never get entered for DirectDraw!, expect problems\n");
4619         if (index_in_alpha)
4620         {
4621             /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4622              * there's no palette at this time. */
4623             for (i = 0; i < 256; i++) table[i][3] = i;
4624         }
4625     }
4626     else
4627     {
4628         TRACE("Using surface palette %p\n", pal);
4629         /* Get the surface's palette */
4630         for (i = 0; i < 256; ++i)
4631         {
4632             table[i][0] = pal->palents[i].peRed;
4633             table[i][1] = pal->palents[i].peGreen;
4634             table[i][2] = pal->palents[i].peBlue;
4635
4636             /* When index_in_alpha is set the palette index is stored in the
4637              * alpha component. In case of a readback we can then read
4638              * GL_ALPHA. Color keying is handled in BltOverride using a
4639              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4640              * color key itself is passed to glAlphaFunc in other cases the
4641              * alpha component of pixels that should be masked away is set to 0. */
4642             if (index_in_alpha)
4643                 table[i][3] = i;
4644             else if (colorkey && color_in_range(&surface->src_blt_color_key, i))
4645                 table[i][3] = 0x00;
4646             else if (pal->flags & WINEDDPCAPS_ALPHA)
4647                 table[i][3] = pal->palents[i].peFlags;
4648             else
4649                 table[i][3] = 0xFF;
4650         }
4651     }
4652 }
4653
4654 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width,
4655         UINT height, UINT outpitch, CONVERT_TYPES convert, struct wined3d_surface *surface)
4656 {
4657     const BYTE *source;
4658     BYTE *dest;
4659     TRACE("(%p)->(%p),(%d,%d,%d,%d,%p)\n", src, dst, pitch, height, outpitch, convert, surface);
4660
4661     switch (convert) {
4662         case NO_CONVERSION:
4663         {
4664             memcpy(dst, src, pitch * height);
4665             break;
4666         }
4667         case CONVERT_PALETTED:
4668         case CONVERT_PALETTED_CK:
4669         {
4670             BYTE table[256][4];
4671             unsigned int x, y;
4672
4673             d3dfmt_p8_init_palette(surface, table, (convert == CONVERT_PALETTED_CK));
4674
4675             for (y = 0; y < height; y++)
4676             {
4677                 source = src + pitch * y;
4678                 dest = dst + outpitch * y;
4679                 /* This is an 1 bpp format, using the width here is fine */
4680                 for (x = 0; x < width; x++) {
4681                     BYTE color = *source++;
4682                     *dest++ = table[color][0];
4683                     *dest++ = table[color][1];
4684                     *dest++ = table[color][2];
4685                     *dest++ = table[color][3];
4686                 }
4687             }
4688         }
4689         break;
4690
4691         case CONVERT_CK_565:
4692         {
4693             /* Converting the 565 format in 5551 packed to emulate color-keying.
4694
4695               Note : in all these conversion, it would be best to average the averaging
4696                       pixels to get the color of the pixel that will be color-keyed to
4697                       prevent 'color bleeding'. This will be done later on if ever it is
4698                       too visible.
4699
4700               Note2: Nvidia documents say that their driver does not support alpha + color keying
4701                      on the same surface and disables color keying in such a case
4702             */
4703             unsigned int x, y;
4704             const WORD *Source;
4705             WORD *Dest;
4706
4707             TRACE("Color keyed 565\n");
4708
4709             for (y = 0; y < height; y++) {
4710                 Source = (const WORD *)(src + y * pitch);
4711                 Dest = (WORD *) (dst + y * outpitch);
4712                 for (x = 0; x < width; x++ ) {
4713                     WORD color = *Source++;
4714                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4715                     if (!color_in_range(&surface->src_blt_color_key, color))
4716                         *Dest |= 0x0001;
4717                     Dest++;
4718                 }
4719             }
4720         }
4721         break;
4722
4723         case CONVERT_CK_5551:
4724         {
4725             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4726             unsigned int x, y;
4727             const WORD *Source;
4728             WORD *Dest;
4729             TRACE("Color keyed 5551\n");
4730             for (y = 0; y < height; y++) {
4731                 Source = (const WORD *)(src + y * pitch);
4732                 Dest = (WORD *) (dst + y * outpitch);
4733                 for (x = 0; x < width; x++ ) {
4734                     WORD color = *Source++;
4735                     *Dest = color;
4736                     if (!color_in_range(&surface->src_blt_color_key, color))
4737                         *Dest |= (1 << 15);
4738                     else
4739                         *Dest &= ~(1 << 15);
4740                     Dest++;
4741                 }
4742             }
4743         }
4744         break;
4745
4746         case CONVERT_CK_RGB24:
4747         {
4748             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4749             unsigned int x, y;
4750             for (y = 0; y < height; y++)
4751             {
4752                 source = src + pitch * y;
4753                 dest = dst + outpitch * y;
4754                 for (x = 0; x < width; x++) {
4755                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4756                     DWORD dstcolor = color << 8;
4757                     if (!color_in_range(&surface->src_blt_color_key, color))
4758                         dstcolor |= 0xff;
4759                     *(DWORD*)dest = dstcolor;
4760                     source += 3;
4761                     dest += 4;
4762                 }
4763             }
4764         }
4765         break;
4766
4767         case CONVERT_RGB32_888:
4768         {
4769             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4770             unsigned int x, y;
4771             for (y = 0; y < height; y++)
4772             {
4773                 source = src + pitch * y;
4774                 dest = dst + outpitch * y;
4775                 for (x = 0; x < width; x++) {
4776                     DWORD color = 0xffffff & *(const DWORD*)source;
4777                     DWORD dstcolor = color << 8;
4778                     if (!color_in_range(&surface->src_blt_color_key, color))
4779                         dstcolor |= 0xff;
4780                     *(DWORD*)dest = dstcolor;
4781                     source += 4;
4782                     dest += 4;
4783                 }
4784             }
4785         }
4786         break;
4787
4788         default:
4789             ERR("Unsupported conversion type %#x.\n", convert);
4790     }
4791     return WINED3D_OK;
4792 }
4793
4794 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4795 {
4796     /* Flip the surface contents */
4797     /* Flip the DC */
4798     {
4799         HDC tmp;
4800         tmp = front->hDC;
4801         front->hDC = back->hDC;
4802         back->hDC = tmp;
4803     }
4804
4805     /* Flip the DIBsection */
4806     {
4807         HBITMAP tmp = front->dib.DIBsection;
4808         front->dib.DIBsection = back->dib.DIBsection;
4809         back->dib.DIBsection = tmp;
4810     }
4811
4812     /* Flip the surface data */
4813     {
4814         void* tmp;
4815
4816         tmp = front->dib.bitmap_data;
4817         front->dib.bitmap_data = back->dib.bitmap_data;
4818         back->dib.bitmap_data = tmp;
4819
4820         tmp = front->resource.allocatedMemory;
4821         front->resource.allocatedMemory = back->resource.allocatedMemory;
4822         back->resource.allocatedMemory = tmp;
4823
4824         tmp = front->resource.heapMemory;
4825         front->resource.heapMemory = back->resource.heapMemory;
4826         back->resource.heapMemory = tmp;
4827     }
4828
4829     /* Flip the PBO */
4830     {
4831         GLuint tmp_pbo = front->pbo;
4832         front->pbo = back->pbo;
4833         back->pbo = tmp_pbo;
4834     }
4835
4836     /* Flip the opengl texture */
4837     {
4838         GLuint tmp;
4839
4840         tmp = back->texture_name;
4841         back->texture_name = front->texture_name;
4842         front->texture_name = tmp;
4843
4844         tmp = back->texture_name_srgb;
4845         back->texture_name_srgb = front->texture_name_srgb;
4846         front->texture_name_srgb = tmp;
4847
4848         tmp = back->rb_multisample;
4849         back->rb_multisample = front->rb_multisample;
4850         front->rb_multisample = tmp;
4851
4852         tmp = back->rb_resolved;
4853         back->rb_resolved = front->rb_resolved;
4854         front->rb_resolved = tmp;
4855
4856         resource_unload(&back->resource);
4857         resource_unload(&front->resource);
4858     }
4859
4860     {
4861         DWORD tmp_flags = back->flags;
4862         back->flags = front->flags;
4863         front->flags = tmp_flags;
4864     }
4865 }
4866
4867 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4868  * pixel copy calls. */
4869 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4870         const RECT *src_rect, const RECT *dst_rect_in, enum wined3d_texture_filter_type filter)
4871 {
4872     struct wined3d_device *device = dst_surface->resource.device;
4873     float xrel, yrel;
4874     UINT row;
4875     struct wined3d_context *context;
4876     BOOL upsidedown = FALSE;
4877     RECT dst_rect = *dst_rect_in;
4878
4879     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4880      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4881      */
4882     if(dst_rect.top > dst_rect.bottom) {
4883         UINT tmp = dst_rect.bottom;
4884         dst_rect.bottom = dst_rect.top;
4885         dst_rect.top = tmp;
4886         upsidedown = TRUE;
4887     }
4888
4889     context = context_acquire(device, src_surface);
4890     context_apply_blit_state(context, device);
4891     surface_internal_preload(dst_surface, SRGB_RGB);
4892     ENTER_GL();
4893
4894     /* Bind the target texture */
4895     context_bind_texture(context, dst_surface->texture_target, dst_surface->texture_name);
4896     if (surface_is_offscreen(src_surface))
4897     {
4898         TRACE("Reading from an offscreen target\n");
4899         upsidedown = !upsidedown;
4900         glReadBuffer(device->offscreenBuffer);
4901     }
4902     else
4903     {
4904         glReadBuffer(surface_get_gl_buffer(src_surface));
4905     }
4906     checkGLcall("glReadBuffer");
4907
4908     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4909     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4910
4911     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4912     {
4913         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4914
4915         if (filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT)
4916             ERR("Texture filtering not supported in direct blit.\n");
4917     }
4918     else if ((filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT)
4919             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4920     {
4921         ERR("Texture filtering not supported in direct blit\n");
4922     }
4923
4924     if (upsidedown
4925             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4926             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4927     {
4928         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4929
4930         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4931                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4932                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4933                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4934     }
4935     else
4936     {
4937         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4938         /* I have to process this row by row to swap the image,
4939          * otherwise it would be upside down, so stretching in y direction
4940          * doesn't cost extra time
4941          *
4942          * However, stretching in x direction can be avoided if not necessary
4943          */
4944         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4945             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4946             {
4947                 /* Well, that stuff works, but it's very slow.
4948                  * find a better way instead
4949                  */
4950                 UINT col;
4951
4952                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4953                 {
4954                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4955                             dst_rect.left + col /* x offset */, row /* y offset */,
4956                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4957                 }
4958             }
4959             else
4960             {
4961                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4962                         dst_rect.left /* x offset */, row /* y offset */,
4963                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4964             }
4965         }
4966     }
4967     checkGLcall("glCopyTexSubImage2D");
4968
4969     LEAVE_GL();
4970     context_release(context);
4971
4972     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4973      * path is never entered
4974      */
4975     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4976 }
4977
4978 /* Uses the hardware to stretch and flip the image */
4979 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4980         const RECT *src_rect, const RECT *dst_rect_in, enum wined3d_texture_filter_type filter)
4981 {
4982     struct wined3d_device *device = dst_surface->resource.device;
4983     struct wined3d_swapchain *src_swapchain = NULL;
4984     GLuint src, backup = 0;
4985     float left, right, top, bottom; /* Texture coordinates */
4986     UINT fbwidth = src_surface->resource.width;
4987     UINT fbheight = src_surface->resource.height;
4988     struct wined3d_context *context;
4989     GLenum drawBuffer = GL_BACK;
4990     GLenum texture_target;
4991     BOOL noBackBufferBackup;
4992     BOOL src_offscreen;
4993     BOOL upsidedown = FALSE;
4994     RECT dst_rect = *dst_rect_in;
4995
4996     TRACE("Using hwstretch blit\n");
4997     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4998     context = context_acquire(device, src_surface);
4999     context_apply_blit_state(context, device);
5000     surface_internal_preload(dst_surface, SRGB_RGB);
5001
5002     src_offscreen = surface_is_offscreen(src_surface);
5003     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
5004     if (!noBackBufferBackup && !src_surface->texture_name)
5005     {
5006         /* Get it a description */
5007         surface_internal_preload(src_surface, SRGB_RGB);
5008     }
5009     ENTER_GL();
5010
5011     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
5012      * This way we don't have to wait for the 2nd readback to finish to leave this function.
5013      */
5014     if (context->aux_buffers >= 2)
5015     {
5016         /* Got more than one aux buffer? Use the 2nd aux buffer */
5017         drawBuffer = GL_AUX1;
5018     }
5019     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
5020     {
5021         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
5022         drawBuffer = GL_AUX0;
5023     }
5024
5025     if(noBackBufferBackup) {
5026         glGenTextures(1, &backup);
5027         checkGLcall("glGenTextures");
5028         context_bind_texture(context, GL_TEXTURE_2D, backup);
5029         texture_target = GL_TEXTURE_2D;
5030     } else {
5031         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
5032          * we are reading from the back buffer, the backup can be used as source texture
5033          */
5034         texture_target = src_surface->texture_target;
5035         context_bind_texture(context, texture_target, src_surface->texture_name);
5036         glEnable(texture_target);
5037         checkGLcall("glEnable(texture_target)");
5038
5039         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
5040         src_surface->flags &= ~SFLAG_INTEXTURE;
5041     }
5042
5043     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
5044      * glCopyTexSubImage is a bit picky about the parameters we pass to it
5045      */
5046     if(dst_rect.top > dst_rect.bottom) {
5047         UINT tmp = dst_rect.bottom;
5048         dst_rect.bottom = dst_rect.top;
5049         dst_rect.top = tmp;
5050         upsidedown = TRUE;
5051     }
5052
5053     if (src_offscreen)
5054     {
5055         TRACE("Reading from an offscreen target\n");
5056         upsidedown = !upsidedown;
5057         glReadBuffer(device->offscreenBuffer);
5058     }
5059     else
5060     {
5061         glReadBuffer(surface_get_gl_buffer(src_surface));
5062     }
5063
5064     /* TODO: Only back up the part that will be overwritten */
5065     glCopyTexSubImage2D(texture_target, 0,
5066                         0, 0 /* read offsets */,
5067                         0, 0,
5068                         fbwidth,
5069                         fbheight);
5070
5071     checkGLcall("glCopyTexSubImage2D");
5072
5073     /* No issue with overriding these - the sampler is dirty due to blit usage */
5074     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
5075             wined3d_gl_mag_filter(magLookup, filter));
5076     checkGLcall("glTexParameteri");
5077     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
5078             wined3d_gl_min_mip_filter(minMipLookup, filter, WINED3D_TEXF_NONE));
5079     checkGLcall("glTexParameteri");
5080
5081     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5082         src_swapchain = src_surface->container.u.swapchain;
5083     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
5084     {
5085         src = backup ? backup : src_surface->texture_name;
5086     }
5087     else
5088     {
5089         glReadBuffer(GL_FRONT);
5090         checkGLcall("glReadBuffer(GL_FRONT)");
5091
5092         glGenTextures(1, &src);
5093         checkGLcall("glGenTextures(1, &src)");
5094         context_bind_texture(context, GL_TEXTURE_2D, src);
5095
5096         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5097          * out for power of 2 sizes
5098          */
5099         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5100                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5101         checkGLcall("glTexImage2D");
5102         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
5103                             0, 0 /* read offsets */,
5104                             0, 0,
5105                             fbwidth,
5106                             fbheight);
5107
5108         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5109         checkGLcall("glTexParameteri");
5110         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5111         checkGLcall("glTexParameteri");
5112
5113         glReadBuffer(GL_BACK);
5114         checkGLcall("glReadBuffer(GL_BACK)");
5115
5116         if(texture_target != GL_TEXTURE_2D) {
5117             glDisable(texture_target);
5118             glEnable(GL_TEXTURE_2D);
5119             texture_target = GL_TEXTURE_2D;
5120         }
5121     }
5122     checkGLcall("glEnd and previous");
5123
5124     left = src_rect->left;
5125     right = src_rect->right;
5126
5127     if (!upsidedown)
5128     {
5129         top = src_surface->resource.height - src_rect->top;
5130         bottom = src_surface->resource.height - src_rect->bottom;
5131     }
5132     else
5133     {
5134         top = src_surface->resource.height - src_rect->bottom;
5135         bottom = src_surface->resource.height - src_rect->top;
5136     }
5137
5138     if (src_surface->flags & SFLAG_NORMCOORD)
5139     {
5140         left /= src_surface->pow2Width;
5141         right /= src_surface->pow2Width;
5142         top /= src_surface->pow2Height;
5143         bottom /= src_surface->pow2Height;
5144     }
5145
5146     /* draw the source texture stretched and upside down. The correct surface is bound already */
5147     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5148     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5149
5150     context_set_draw_buffer(context, drawBuffer);
5151     glReadBuffer(drawBuffer);
5152
5153     glBegin(GL_QUADS);
5154         /* bottom left */
5155         glTexCoord2f(left, bottom);
5156         glVertex2i(0, 0);
5157
5158         /* top left */
5159         glTexCoord2f(left, top);
5160         glVertex2i(0, dst_rect.bottom - dst_rect.top);
5161
5162         /* top right */
5163         glTexCoord2f(right, top);
5164         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5165
5166         /* bottom right */
5167         glTexCoord2f(right, bottom);
5168         glVertex2i(dst_rect.right - dst_rect.left, 0);
5169     glEnd();
5170     checkGLcall("glEnd and previous");
5171
5172     if (texture_target != dst_surface->texture_target)
5173     {
5174         glDisable(texture_target);
5175         glEnable(dst_surface->texture_target);
5176         texture_target = dst_surface->texture_target;
5177     }
5178
5179     /* Now read the stretched and upside down image into the destination texture */
5180     context_bind_texture(context, texture_target, dst_surface->texture_name);
5181     glCopyTexSubImage2D(texture_target,
5182                         0,
5183                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5184                         0, 0, /* We blitted the image to the origin */
5185                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5186     checkGLcall("glCopyTexSubImage2D");
5187
5188     if(drawBuffer == GL_BACK) {
5189         /* Write the back buffer backup back */
5190         if(backup) {
5191             if(texture_target != GL_TEXTURE_2D) {
5192                 glDisable(texture_target);
5193                 glEnable(GL_TEXTURE_2D);
5194                 texture_target = GL_TEXTURE_2D;
5195             }
5196             context_bind_texture(context, GL_TEXTURE_2D, backup);
5197         }
5198         else
5199         {
5200             if (texture_target != src_surface->texture_target)
5201             {
5202                 glDisable(texture_target);
5203                 glEnable(src_surface->texture_target);
5204                 texture_target = src_surface->texture_target;
5205             }
5206             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5207         }
5208
5209         glBegin(GL_QUADS);
5210             /* top left */
5211             glTexCoord2f(0.0f, 0.0f);
5212             glVertex2i(0, fbheight);
5213
5214             /* bottom left */
5215             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5216             glVertex2i(0, 0);
5217
5218             /* bottom right */
5219             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5220                     (float)fbheight / (float)src_surface->pow2Height);
5221             glVertex2i(fbwidth, 0);
5222
5223             /* top right */
5224             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5225             glVertex2i(fbwidth, fbheight);
5226         glEnd();
5227     }
5228     glDisable(texture_target);
5229     checkGLcall("glDisable(texture_target)");
5230
5231     /* Cleanup */
5232     if (src != src_surface->texture_name && src != backup)
5233     {
5234         glDeleteTextures(1, &src);
5235         checkGLcall("glDeleteTextures(1, &src)");
5236     }
5237     if(backup) {
5238         glDeleteTextures(1, &backup);
5239         checkGLcall("glDeleteTextures(1, &backup)");
5240     }
5241
5242     LEAVE_GL();
5243
5244     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5245
5246     context_release(context);
5247
5248     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5249      * path is never entered
5250      */
5251     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5252 }
5253
5254 /* Front buffer coordinates are always full screen coordinates, but our GL
5255  * drawable is limited to the window's client area. The sysmem and texture
5256  * copies do have the full screen size. Note that GL has a bottom-left
5257  * origin, while D3D has a top-left origin. */
5258 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5259 {
5260     UINT drawable_height;
5261
5262     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5263             && surface == surface->container.u.swapchain->front_buffer)
5264     {
5265         POINT offset = {0, 0};
5266         RECT windowsize;
5267
5268         ScreenToClient(window, &offset);
5269         OffsetRect(rect, offset.x, offset.y);
5270
5271         GetClientRect(window, &windowsize);
5272         drawable_height = windowsize.bottom - windowsize.top;
5273     }
5274     else
5275     {
5276         drawable_height = surface->resource.height;
5277     }
5278
5279     rect->top = drawable_height - rect->top;
5280     rect->bottom = drawable_height - rect->bottom;
5281 }
5282
5283 static void surface_blt_to_drawable(const struct wined3d_device *device,
5284         enum wined3d_texture_filter_type filter, BOOL color_key,
5285         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5286         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5287 {
5288     struct wined3d_context *context;
5289     RECT src_rect, dst_rect;
5290
5291     src_rect = *src_rect_in;
5292     dst_rect = *dst_rect_in;
5293
5294     /* Make sure the surface is up-to-date. This should probably use
5295      * surface_load_location() and worry about the destination surface too,
5296      * unless we're overwriting it completely. */
5297     surface_internal_preload(src_surface, SRGB_RGB);
5298
5299     /* Activate the destination context, set it up for blitting */
5300     context = context_acquire(device, dst_surface);
5301     context_apply_blit_state(context, device);
5302
5303     if (!surface_is_offscreen(dst_surface))
5304         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5305
5306     device->blitter->set_shader(device->blit_priv, context, src_surface);
5307
5308     ENTER_GL();
5309
5310     if (color_key)
5311     {
5312         glEnable(GL_ALPHA_TEST);
5313         checkGLcall("glEnable(GL_ALPHA_TEST)");
5314
5315         /* When the primary render target uses P8, the alpha component
5316          * contains the palette index. Which means that the colorkey is one of
5317          * the palette entries. In other cases pixels that should be masked
5318          * away have alpha set to 0. */
5319         if (primary_render_target_is_p8(device))
5320             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->src_blt_color_key.color_space_low_value / 256.0f);
5321         else
5322             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5323         checkGLcall("glAlphaFunc");
5324     }
5325     else
5326     {
5327         glDisable(GL_ALPHA_TEST);
5328         checkGLcall("glDisable(GL_ALPHA_TEST)");
5329     }
5330
5331     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5332
5333     if (color_key)
5334     {
5335         glDisable(GL_ALPHA_TEST);
5336         checkGLcall("glDisable(GL_ALPHA_TEST)");
5337     }
5338
5339     LEAVE_GL();
5340
5341     /* Leave the opengl state valid for blitting */
5342     device->blitter->unset_shader(context->gl_info);
5343
5344     if (wined3d_settings.strict_draw_ordering
5345             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5346             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5347         wglFlush(); /* Flush to ensure ordering across contexts. */
5348
5349     context_release(context);
5350 }
5351
5352 /* Do not call while under the GL lock. */
5353 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const struct wined3d_color *color)
5354 {
5355     struct wined3d_device *device = s->resource.device;
5356     const struct blit_shader *blitter;
5357
5358     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5359             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5360     if (!blitter)
5361     {
5362         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5363         return WINED3DERR_INVALIDCALL;
5364     }
5365
5366     return blitter->color_fill(device, s, rect, color);
5367 }
5368
5369 /* Do not call while under the GL lock. */
5370 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5371         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5372         enum wined3d_texture_filter_type filter)
5373 {
5374     struct wined3d_device *device = dst_surface->resource.device;
5375     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5376     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5377
5378     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5379             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5380             flags, DDBltFx, debug_d3dtexturefiltertype(filter));
5381
5382     /* Get the swapchain. One of the surfaces has to be a primary surface */
5383     if (dst_surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
5384     {
5385         WARN("Destination is in sysmem, rejecting gl blt\n");
5386         return WINED3DERR_INVALIDCALL;
5387     }
5388
5389     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5390         dstSwapchain = dst_surface->container.u.swapchain;
5391
5392     if (src_surface)
5393     {
5394         if (src_surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
5395         {
5396             WARN("Src is in sysmem, rejecting gl blt\n");
5397             return WINED3DERR_INVALIDCALL;
5398         }
5399
5400         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5401             srcSwapchain = src_surface->container.u.swapchain;
5402     }
5403
5404     /* Early sort out of cases where no render target is used */
5405     if (!dstSwapchain && !srcSwapchain
5406             && src_surface != device->fb.render_targets[0]
5407             && dst_surface != device->fb.render_targets[0])
5408     {
5409         TRACE("No surface is render target, not using hardware blit.\n");
5410         return WINED3DERR_INVALIDCALL;
5411     }
5412
5413     /* No destination color keying supported */
5414     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5415     {
5416         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5417         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5418         return WINED3DERR_INVALIDCALL;
5419     }
5420
5421     if (dstSwapchain && dstSwapchain == srcSwapchain)
5422     {
5423         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5424         return WINED3DERR_INVALIDCALL;
5425     }
5426
5427     if (dstSwapchain && srcSwapchain)
5428     {
5429         FIXME("Implement hardware blit between two different swapchains\n");
5430         return WINED3DERR_INVALIDCALL;
5431     }
5432
5433     if (dstSwapchain)
5434     {
5435         /* Handled with regular texture -> swapchain blit */
5436         if (src_surface == device->fb.render_targets[0])
5437             TRACE("Blit from active render target to a swapchain\n");
5438     }
5439     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5440     {
5441         FIXME("Implement blit from a swapchain to the active render target\n");
5442         return WINED3DERR_INVALIDCALL;
5443     }
5444
5445     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5446     {
5447         /* Blit from render target to texture */
5448         BOOL stretchx;
5449
5450         /* P8 read back is not implemented */
5451         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5452                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5453         {
5454             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5455             return WINED3DERR_INVALIDCALL;
5456         }
5457
5458         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5459         {
5460             TRACE("Color keying not supported by frame buffer to texture blit\n");
5461             return WINED3DERR_INVALIDCALL;
5462             /* Destination color key is checked above */
5463         }
5464
5465         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5466             stretchx = TRUE;
5467         else
5468             stretchx = FALSE;
5469
5470         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5471          * flip the image nor scale it.
5472          *
5473          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5474          * -> If the app wants a image width an unscaled width, copy it line per line
5475          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5476          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5477          *    back buffer. This is slower than reading line per line, thus not used for flipping
5478          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5479          *    pixel by pixel. */
5480         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5481                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5482         {
5483             TRACE("No stretching in x direction, using direct framebuffer -> texture copy.\n");
5484             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, filter);
5485         }
5486         else
5487         {
5488             TRACE("Using hardware stretching to flip / stretch the texture.\n");
5489             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, filter);
5490         }
5491
5492         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5493         {
5494             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5495             dst_surface->resource.allocatedMemory = NULL;
5496             dst_surface->resource.heapMemory = NULL;
5497         }
5498         else
5499         {
5500             dst_surface->flags &= ~SFLAG_INSYSMEM;
5501         }
5502
5503         return WINED3D_OK;
5504     }
5505     else if (src_surface)
5506     {
5507         /* Blit from offscreen surface to render target */
5508         struct wined3d_color_key old_blt_key = src_surface->src_blt_color_key;
5509         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5510
5511         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5512
5513         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5514                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5515                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5516         {
5517             FIXME("Unsupported blit operation falling back to software\n");
5518             return WINED3DERR_INVALIDCALL;
5519         }
5520
5521         /* Color keying: Check if we have to do a color keyed blt,
5522          * and if not check if a color key is activated.
5523          *
5524          * Just modify the color keying parameters in the surface and restore them afterwards
5525          * The surface keeps track of the color key last used to load the opengl surface.
5526          * PreLoad will catch the change to the flags and color key and reload if necessary.
5527          */
5528         if (flags & WINEDDBLT_KEYSRC)
5529         {
5530             /* Use color key from surface */
5531         }
5532         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5533         {
5534             /* Use color key from DDBltFx */
5535             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5536             src_surface->src_blt_color_key = DDBltFx->ddckSrcColorkey;
5537         }
5538         else
5539         {
5540             /* Do not use color key */
5541             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5542         }
5543
5544         surface_blt_to_drawable(device, filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5545                 src_surface, src_rect, dst_surface, dst_rect);
5546
5547         /* Restore the color key parameters */
5548         src_surface->CKeyFlags = oldCKeyFlags;
5549         src_surface->src_blt_color_key = old_blt_key;
5550
5551         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5552
5553         return WINED3D_OK;
5554     }
5555
5556     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5557     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5558     return WINED3DERR_INVALIDCALL;
5559 }
5560
5561 /* GL locking is done by the caller */
5562 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5563         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5564 {
5565     struct wined3d_device *device = surface->resource.device;
5566     const struct wined3d_gl_info *gl_info = context->gl_info;
5567     GLint compare_mode = GL_NONE;
5568     struct blt_info info;
5569     GLint old_binding = 0;
5570     RECT rect;
5571
5572     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5573
5574     glDisable(GL_CULL_FACE);
5575     glDisable(GL_BLEND);
5576     glDisable(GL_ALPHA_TEST);
5577     glDisable(GL_SCISSOR_TEST);
5578     glDisable(GL_STENCIL_TEST);
5579     glEnable(GL_DEPTH_TEST);
5580     glDepthFunc(GL_ALWAYS);
5581     glDepthMask(GL_TRUE);
5582     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5583     glViewport(x, y, w, h);
5584
5585     SetRect(&rect, 0, h, w, 0);
5586     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5587     context_active_texture(context, context->gl_info, 0);
5588     glGetIntegerv(info.binding, &old_binding);
5589     glBindTexture(info.bind_target, texture);
5590     if (gl_info->supported[ARB_SHADOW])
5591     {
5592         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5593         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5594     }
5595
5596     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5597             gl_info, info.tex_type, &surface->ds_current_size);
5598
5599     glBegin(GL_TRIANGLE_STRIP);
5600     glTexCoord3fv(info.coords[0]);
5601     glVertex2f(-1.0f, -1.0f);
5602     glTexCoord3fv(info.coords[1]);
5603     glVertex2f(1.0f, -1.0f);
5604     glTexCoord3fv(info.coords[2]);
5605     glVertex2f(-1.0f, 1.0f);
5606     glTexCoord3fv(info.coords[3]);
5607     glVertex2f(1.0f, 1.0f);
5608     glEnd();
5609
5610     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5611     glBindTexture(info.bind_target, old_binding);
5612
5613     glPopAttrib();
5614
5615     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5616 }
5617
5618 void surface_modify_ds_location(struct wined3d_surface *surface,
5619         DWORD location, UINT w, UINT h)
5620 {
5621     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5622
5623     if (location & ~(SFLAG_LOCATIONS | SFLAG_LOST))
5624         FIXME("Invalid location (%#x) specified.\n", location);
5625
5626     if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5627             || (!(surface->flags & SFLAG_INTEXTURE) && (location & SFLAG_INTEXTURE)))
5628     {
5629         if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5630         {
5631             TRACE("Passing to container.\n");
5632             wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5633         }
5634     }
5635
5636     surface->ds_current_size.cx = w;
5637     surface->ds_current_size.cy = h;
5638     surface->flags &= ~(SFLAG_LOCATIONS | SFLAG_LOST);
5639     surface->flags |= location;
5640 }
5641
5642 /* Context activation is done by the caller. */
5643 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5644 {
5645     struct wined3d_device *device = surface->resource.device;
5646     GLsizei w, h;
5647
5648     TRACE("surface %p, new location %#x.\n", surface, location);
5649
5650     /* TODO: Make this work for modes other than FBO */
5651     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5652
5653     if (!(surface->flags & location))
5654     {
5655         w = surface->ds_current_size.cx;
5656         h = surface->ds_current_size.cy;
5657         surface->ds_current_size.cx = 0;
5658         surface->ds_current_size.cy = 0;
5659     }
5660     else
5661     {
5662         w = surface->resource.width;
5663         h = surface->resource.height;
5664     }
5665
5666     if (surface->ds_current_size.cx == surface->resource.width
5667             && surface->ds_current_size.cy == surface->resource.height)
5668     {
5669         TRACE("Location (%#x) is already up to date.\n", location);
5670         return;
5671     }
5672
5673     if (surface->current_renderbuffer)
5674     {
5675         FIXME("Not supported with fixed up depth stencil.\n");
5676         return;
5677     }
5678
5679     if (surface->flags & SFLAG_LOST)
5680     {
5681         TRACE("Surface was discarded, no need copy data.\n");
5682         switch (location)
5683         {
5684             case SFLAG_INTEXTURE:
5685                 surface_prepare_texture(surface, context, FALSE);
5686                 break;
5687             case SFLAG_INRB_MULTISAMPLE:
5688                 surface_prepare_rb(surface, context->gl_info, TRUE);
5689                 break;
5690             case SFLAG_INDRAWABLE:
5691                 /* Nothing to do */
5692                 break;
5693             default:
5694                 FIXME("Unhandled location %#x\n", location);
5695         }
5696         surface->flags &= ~SFLAG_LOST;
5697         surface->flags |= location;
5698         surface->ds_current_size.cx = surface->resource.width;
5699         surface->ds_current_size.cy = surface->resource.height;
5700         return;
5701     }
5702
5703     if (!(surface->flags & SFLAG_LOCATIONS))
5704     {
5705         FIXME("No up to date depth stencil location.\n");
5706         surface->flags |= location;
5707         surface->ds_current_size.cx = surface->resource.width;
5708         surface->ds_current_size.cy = surface->resource.height;
5709         return;
5710     }
5711
5712     if (location == SFLAG_INTEXTURE)
5713     {
5714         GLint old_binding = 0;
5715         GLenum bind_target;
5716
5717         /* The render target is allowed to be smaller than the depth/stencil
5718          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5719          * than the offscreen surface. Don't overwrite the offscreen surface
5720          * with undefined data. */
5721         w = min(w, context->swapchain->desc.backbuffer_width);
5722         h = min(h, context->swapchain->desc.backbuffer_height);
5723
5724         TRACE("Copying onscreen depth buffer to depth texture.\n");
5725
5726         ENTER_GL();
5727
5728         if (!device->depth_blt_texture)
5729         {
5730             glGenTextures(1, &device->depth_blt_texture);
5731         }
5732
5733         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5734          * directly on the FBO texture. That's because we need to flip. */
5735         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5736                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5737         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5738         {
5739             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5740             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5741         }
5742         else
5743         {
5744             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5745             bind_target = GL_TEXTURE_2D;
5746         }
5747         glBindTexture(bind_target, device->depth_blt_texture);
5748         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5749          * internal format, because the internal format might include stencil
5750          * data. In principle we should copy stencil data as well, but unless
5751          * the driver supports stencil export it's hard to do, and doesn't
5752          * seem to be needed in practice. If the hardware doesn't support
5753          * writing stencil data, the glCopyTexImage2D() call might trigger
5754          * software fallbacks. */
5755         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5756         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5757         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5758         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5759         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5760         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5761         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5762         glBindTexture(bind_target, old_binding);
5763
5764         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5765                 NULL, surface, SFLAG_INTEXTURE);
5766         context_set_draw_buffer(context, GL_NONE);
5767         glReadBuffer(GL_NONE);
5768
5769         /* Do the actual blit */
5770         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5771         checkGLcall("depth_blt");
5772
5773         context_invalidate_state(context, STATE_FRAMEBUFFER);
5774
5775         LEAVE_GL();
5776
5777         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5778     }
5779     else if (location == SFLAG_INDRAWABLE)
5780     {
5781         TRACE("Copying depth texture to onscreen depth buffer.\n");
5782
5783         ENTER_GL();
5784
5785         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5786                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5787         surface_depth_blt(surface, context, surface->texture_name,
5788                 0, surface->pow2Height - h, w, h, surface->texture_target);
5789         checkGLcall("depth_blt");
5790
5791         context_invalidate_state(context, STATE_FRAMEBUFFER);
5792
5793         LEAVE_GL();
5794
5795         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5796     }
5797     else
5798     {
5799         ERR("Invalid location (%#x) specified.\n", location);
5800     }
5801
5802     surface->flags |= location;
5803     surface->ds_current_size.cx = surface->resource.width;
5804     surface->ds_current_size.cy = surface->resource.height;
5805 }
5806
5807 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5808 {
5809     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5810     struct wined3d_surface *overlay;
5811
5812     TRACE("surface %p, location %s, persistent %#x.\n",
5813             surface, debug_surflocation(location), persistent);
5814
5815     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5816             && !(surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
5817             && (location & SFLAG_INDRAWABLE))
5818         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5819
5820     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5821             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5822         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5823
5824     if (persistent)
5825     {
5826         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5827                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5828         {
5829             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5830             {
5831                 TRACE("Passing to container.\n");
5832                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5833             }
5834         }
5835         surface->flags &= ~SFLAG_LOCATIONS;
5836         surface->flags |= location;
5837
5838         /* Redraw emulated overlays, if any */
5839         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5840         {
5841             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5842             {
5843                 surface_draw_overlay(overlay);
5844             }
5845         }
5846     }
5847     else
5848     {
5849         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5850         {
5851             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5852             {
5853                 TRACE("Passing to container\n");
5854                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5855             }
5856         }
5857         surface->flags &= ~location;
5858     }
5859
5860     if (!(surface->flags & SFLAG_LOCATIONS))
5861     {
5862         ERR("Surface %p does not have any up to date location.\n", surface);
5863     }
5864 }
5865
5866 static DWORD resource_access_from_location(DWORD location)
5867 {
5868     switch (location)
5869     {
5870         case SFLAG_INSYSMEM:
5871             return WINED3D_RESOURCE_ACCESS_CPU;
5872
5873         case SFLAG_INDRAWABLE:
5874         case SFLAG_INSRGBTEX:
5875         case SFLAG_INTEXTURE:
5876         case SFLAG_INRB_MULTISAMPLE:
5877         case SFLAG_INRB_RESOLVED:
5878             return WINED3D_RESOURCE_ACCESS_GPU;
5879
5880         default:
5881             FIXME("Unhandled location %#x.\n", location);
5882             return 0;
5883     }
5884 }
5885
5886 static void surface_load_sysmem(struct wined3d_surface *surface,
5887         const struct wined3d_gl_info *gl_info, const RECT *rect)
5888 {
5889     surface_prepare_system_memory(surface);
5890
5891     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED))
5892         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5893
5894     /* Download the surface to system memory. */
5895     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5896     {
5897         struct wined3d_device *device = surface->resource.device;
5898         struct wined3d_context *context;
5899
5900         /* TODO: Use already acquired context when possible. */
5901         context = context_acquire(device, NULL);
5902
5903         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5904         surface_download_data(surface, gl_info);
5905
5906         context_release(context);
5907
5908         return;
5909     }
5910
5911     if (surface->flags & SFLAG_INDRAWABLE)
5912     {
5913         read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5914                 wined3d_surface_get_pitch(surface));
5915         return;
5916     }
5917
5918     FIXME("Can't load surface %p with location flags %#x into sysmem.\n",
5919             surface, surface->flags & SFLAG_LOCATIONS);
5920 }
5921
5922 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5923         const struct wined3d_gl_info *gl_info, const RECT *rect)
5924 {
5925     struct wined3d_device *device = surface->resource.device;
5926     struct wined3d_format format;
5927     CONVERT_TYPES convert;
5928     UINT byte_count;
5929     BYTE *mem;
5930
5931     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5932     {
5933         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5934         return WINED3DERR_INVALIDCALL;
5935     }
5936
5937     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5938         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5939
5940     if (surface->flags & SFLAG_INTEXTURE)
5941     {
5942         RECT r;
5943
5944         surface_get_rect(surface, rect, &r);
5945         surface_blt_to_drawable(device, WINED3D_TEXF_POINT, FALSE, surface, &r, surface, &r);
5946
5947         return WINED3D_OK;
5948     }
5949
5950     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5951     {
5952         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5953          * path through sysmem. */
5954         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5955     }
5956
5957     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5958
5959     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5960      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5961      * called. */
5962     if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
5963     {
5964         struct wined3d_context *context;
5965
5966         TRACE("Removing the pbo attached to surface %p.\n", surface);
5967
5968         /* TODO: Use already acquired context when possible. */
5969         context = context_acquire(device, NULL);
5970
5971         surface_remove_pbo(surface, gl_info);
5972
5973         context_release(context);
5974     }
5975
5976     if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
5977     {
5978         UINT height = surface->resource.height;
5979         UINT width = surface->resource.width;
5980         UINT src_pitch, dst_pitch;
5981
5982         byte_count = format.conv_byte_count;
5983         src_pitch = wined3d_surface_get_pitch(surface);
5984
5985         /* Stick to the alignment for the converted surface too, makes it
5986          * easier to load the surface. */
5987         dst_pitch = width * byte_count;
5988         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5989
5990         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5991         {
5992             ERR("Out of memory (%u).\n", dst_pitch * height);
5993             return E_OUTOFMEMORY;
5994         }
5995
5996         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5997                 src_pitch, width, height, dst_pitch, convert, surface);
5998
5999         surface->flags |= SFLAG_CONVERTED;
6000     }
6001     else
6002     {
6003         surface->flags &= ~SFLAG_CONVERTED;
6004         mem = surface->resource.allocatedMemory;
6005         byte_count = format.byte_count;
6006     }
6007
6008     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
6009
6010     /* Don't delete PBO memory. */
6011     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6012         HeapFree(GetProcessHeap(), 0, mem);
6013
6014     return WINED3D_OK;
6015 }
6016
6017 static HRESULT surface_load_texture(struct wined3d_surface *surface,
6018         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
6019 {
6020     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
6021     struct wined3d_device *device = surface->resource.device;
6022     struct wined3d_context *context;
6023     UINT width, src_pitch, dst_pitch;
6024     struct wined3d_bo_address data;
6025     struct wined3d_format format;
6026     POINT dst_point = {0, 0};
6027     CONVERT_TYPES convert;
6028     BYTE *mem;
6029
6030     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
6031             && surface_is_offscreen(surface)
6032             && (surface->flags & SFLAG_INDRAWABLE))
6033     {
6034         surface_load_fb_texture(surface, srgb);
6035
6036         return WINED3D_OK;
6037     }
6038
6039     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
6040             && (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB)
6041             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6042                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6043                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6044     {
6045         if (srgb)
6046             surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, SFLAG_INTEXTURE,
6047                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
6048         else
6049             surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, SFLAG_INSRGBTEX,
6050                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
6051
6052         return WINED3D_OK;
6053     }
6054
6055     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED)
6056             && (!srgb || (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB))
6057             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6058                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6059                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6060     {
6061         DWORD src_location = surface->flags & SFLAG_INRB_RESOLVED ? SFLAG_INRB_RESOLVED : SFLAG_INRB_MULTISAMPLE;
6062         DWORD dst_location = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
6063         RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6064
6065         surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, src_location,
6066                 &rect, surface, dst_location, &rect);
6067
6068         return WINED3D_OK;
6069     }
6070
6071     /* Upload from system memory */
6072
6073     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
6074             TRUE /* We will use textures */, &format, &convert);
6075
6076     if (srgb)
6077     {
6078         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
6079         {
6080             /* Performance warning... */
6081             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6082             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6083         }
6084     }
6085     else
6086     {
6087         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6088         {
6089             /* Performance warning... */
6090             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6091             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6092         }
6093     }
6094
6095     if (!(surface->flags & SFLAG_INSYSMEM))
6096     {
6097         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6098         /* Lets hope we get it from somewhere... */
6099         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6100     }
6101
6102     /* TODO: Use already acquired context when possible. */
6103     context = context_acquire(device, NULL);
6104
6105     surface_prepare_texture(surface, context, srgb);
6106     surface_bind_and_dirtify(surface, context, srgb);
6107
6108     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6109     {
6110         surface->flags |= SFLAG_GLCKEY;
6111         surface->gl_color_key = surface->src_blt_color_key;
6112     }
6113     else surface->flags &= ~SFLAG_GLCKEY;
6114
6115     width = surface->resource.width;
6116     src_pitch = wined3d_surface_get_pitch(surface);
6117
6118     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6119      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6120      * called. */
6121     if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
6122     {
6123         TRACE("Removing the pbo attached to surface %p.\n", surface);
6124         surface_remove_pbo(surface, gl_info);
6125     }
6126
6127     if (format.convert)
6128     {
6129         /* This code is entered for texture formats which need a fixup. */
6130         UINT height = surface->resource.height;
6131
6132         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6133         dst_pitch = width * format.conv_byte_count;
6134         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6135
6136         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6137         {
6138             ERR("Out of memory (%u).\n", dst_pitch * height);
6139             context_release(context);
6140             return E_OUTOFMEMORY;
6141         }
6142         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6143         format.byte_count = format.conv_byte_count;
6144         src_pitch = dst_pitch;
6145     }
6146     else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
6147     {
6148         /* This code is only entered for color keying fixups */
6149         UINT height = surface->resource.height;
6150
6151         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6152         dst_pitch = width * format.conv_byte_count;
6153         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6154
6155         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6156         {
6157             ERR("Out of memory (%u).\n", dst_pitch * height);
6158             context_release(context);
6159             return E_OUTOFMEMORY;
6160         }
6161         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6162                 width, height, dst_pitch, convert, surface);
6163         format.byte_count = format.conv_byte_count;
6164         src_pitch = dst_pitch;
6165     }
6166     else
6167     {
6168         mem = surface->resource.allocatedMemory;
6169     }
6170
6171     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6172     data.addr = mem;
6173     surface_upload_data(surface, gl_info, &format, &src_rect, src_pitch, &dst_point, srgb, &data);
6174
6175     context_release(context);
6176
6177     /* Don't delete PBO memory. */
6178     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6179         HeapFree(GetProcessHeap(), 0, mem);
6180
6181     return WINED3D_OK;
6182 }
6183
6184 static void surface_multisample_resolve(struct wined3d_surface *surface)
6185 {
6186     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6187
6188     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6189         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6190
6191     surface_blt_fbo(surface->resource.device, WINED3D_TEXF_POINT,
6192             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6193 }
6194
6195 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6196 {
6197     struct wined3d_device *device = surface->resource.device;
6198     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6199     HRESULT hr;
6200
6201     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6202
6203     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6204     {
6205         if (location == SFLAG_INTEXTURE)
6206         {
6207             struct wined3d_context *context = context_acquire(device, NULL);
6208             surface_load_ds_location(surface, context, location);
6209             context_release(context);
6210             return WINED3D_OK;
6211         }
6212         else
6213         {
6214             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6215             return WINED3DERR_INVALIDCALL;
6216         }
6217     }
6218
6219     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6220         location = SFLAG_INTEXTURE;
6221
6222     if (surface->flags & location)
6223     {
6224         TRACE("Location already up to date.\n");
6225
6226         if (location == SFLAG_INSYSMEM && !(surface->flags & SFLAG_PBO)
6227                 && surface_need_pbo(surface, gl_info))
6228             surface_load_pbo(surface, gl_info);
6229
6230         return WINED3D_OK;
6231     }
6232
6233     if (WARN_ON(d3d_surface))
6234     {
6235         DWORD required_access = resource_access_from_location(location);
6236         if ((surface->resource.access_flags & required_access) != required_access)
6237             WARN("Operation requires %#x access, but surface only has %#x.\n",
6238                     required_access, surface->resource.access_flags);
6239     }
6240
6241     if (!(surface->flags & SFLAG_LOCATIONS))
6242     {
6243         ERR("Surface %p does not have any up to date location.\n", surface);
6244         surface->flags |= SFLAG_LOST;
6245         return WINED3DERR_DEVICELOST;
6246     }
6247
6248     switch (location)
6249     {
6250         case SFLAG_INSYSMEM:
6251             surface_load_sysmem(surface, gl_info, rect);
6252             break;
6253
6254         case SFLAG_INDRAWABLE:
6255             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6256                 return hr;
6257             break;
6258
6259         case SFLAG_INRB_RESOLVED:
6260             surface_multisample_resolve(surface);
6261             break;
6262
6263         case SFLAG_INTEXTURE:
6264         case SFLAG_INSRGBTEX:
6265             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6266                 return hr;
6267             break;
6268
6269         default:
6270             ERR("Don't know how to handle location %#x.\n", location);
6271             break;
6272     }
6273
6274     if (!rect)
6275     {
6276         surface->flags |= location;
6277
6278         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6279             surface_evict_sysmem(surface);
6280     }
6281
6282     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6283             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6284     {
6285         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6286     }
6287
6288     return WINED3D_OK;
6289 }
6290
6291 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6292 {
6293     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6294
6295     /* Not on a swapchain - must be offscreen */
6296     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6297
6298     /* The front buffer is always onscreen */
6299     if (surface == swapchain->front_buffer) return FALSE;
6300
6301     /* If the swapchain is rendered to an FBO, the backbuffer is
6302      * offscreen, otherwise onscreen */
6303     return swapchain->render_to_fbo;
6304 }
6305
6306 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6307 /* Context activation is done by the caller. */
6308 static void ffp_blit_free(struct wined3d_device *device) { }
6309
6310 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6311 /* Context activation is done by the caller. */
6312 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6313 {
6314     BYTE table[256][4];
6315     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6316
6317     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6318
6319     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6320     ENTER_GL();
6321     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6322     LEAVE_GL();
6323 }
6324
6325 /* Context activation is done by the caller. */
6326 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6327 {
6328     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6329
6330     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6331      * else the surface is converted in software at upload time in LoadLocation.
6332      */
6333     if (!(surface->flags & SFLAG_CONVERTED) && fixup == COMPLEX_FIXUP_P8
6334             && context->gl_info->supported[EXT_PALETTED_TEXTURE])
6335         ffp_blit_p8_upload_palette(surface, context->gl_info);
6336
6337     ENTER_GL();
6338     glEnable(surface->texture_target);
6339     checkGLcall("glEnable(surface->texture_target)");
6340     LEAVE_GL();
6341     return WINED3D_OK;
6342 }
6343
6344 /* Context activation is done by the caller. */
6345 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6346 {
6347     ENTER_GL();
6348     glDisable(GL_TEXTURE_2D);
6349     checkGLcall("glDisable(GL_TEXTURE_2D)");
6350     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6351     {
6352         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6353         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6354     }
6355     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6356     {
6357         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6358         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6359     }
6360     LEAVE_GL();
6361 }
6362
6363 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6364         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
6365         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
6366 {
6367     enum complex_fixup src_fixup;
6368
6369     switch (blit_op)
6370     {
6371         case WINED3D_BLIT_OP_COLOR_BLIT:
6372             if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM)
6373                 return FALSE;
6374
6375             src_fixup = get_complex_fixup(src_format->color_fixup);
6376             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6377             {
6378                 TRACE("Checking support for fixup:\n");
6379                 dump_color_fixup_desc(src_format->color_fixup);
6380             }
6381
6382             if (!is_identity_fixup(dst_format->color_fixup))
6383             {
6384                 TRACE("Destination fixups are not supported\n");
6385                 return FALSE;
6386             }
6387
6388             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6389             {
6390                 TRACE("P8 fixup supported\n");
6391                 return TRUE;
6392             }
6393
6394             /* We only support identity conversions. */
6395             if (is_identity_fixup(src_format->color_fixup))
6396             {
6397                 TRACE("[OK]\n");
6398                 return TRUE;
6399             }
6400
6401             TRACE("[FAILED]\n");
6402             return FALSE;
6403
6404         case WINED3D_BLIT_OP_COLOR_FILL:
6405             if (dst_pool == WINED3D_POOL_SYSTEM_MEM)
6406                 return FALSE;
6407
6408             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6409             {
6410                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6411                     return FALSE;
6412             }
6413             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6414             {
6415                 TRACE("Color fill not supported\n");
6416                 return FALSE;
6417             }
6418
6419             /* FIXME: We should reject color fills on formats with fixups,
6420              * but this would break P8 color fills for example. */
6421
6422             return TRUE;
6423
6424         case WINED3D_BLIT_OP_DEPTH_FILL:
6425             return TRUE;
6426
6427         default:
6428             TRACE("Unsupported blit_op=%d\n", blit_op);
6429             return FALSE;
6430     }
6431 }
6432
6433 /* Do not call while under the GL lock. */
6434 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6435         const RECT *dst_rect, const struct wined3d_color *color)
6436 {
6437     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6438     struct wined3d_fb_state fb = {&dst_surface, NULL};
6439
6440     return device_clear_render_targets(device, 1, &fb,
6441             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6442 }
6443
6444 /* Do not call while under the GL lock. */
6445 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6446         struct wined3d_surface *surface, const RECT *rect, float depth)
6447 {
6448     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6449     struct wined3d_fb_state fb = {NULL, surface};
6450
6451     return device_clear_render_targets(device, 0, &fb,
6452             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6453 }
6454
6455 const struct blit_shader ffp_blit =  {
6456     ffp_blit_alloc,
6457     ffp_blit_free,
6458     ffp_blit_set,
6459     ffp_blit_unset,
6460     ffp_blit_supported,
6461     ffp_blit_color_fill,
6462     ffp_blit_depth_fill,
6463 };
6464
6465 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6466 {
6467     return WINED3D_OK;
6468 }
6469
6470 /* Context activation is done by the caller. */
6471 static void cpu_blit_free(struct wined3d_device *device)
6472 {
6473 }
6474
6475 /* Context activation is done by the caller. */
6476 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6477 {
6478     return WINED3D_OK;
6479 }
6480
6481 /* Context activation is done by the caller. */
6482 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6483 {
6484 }
6485
6486 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6487         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
6488         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
6489 {
6490     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6491     {
6492         return TRUE;
6493     }
6494
6495     return FALSE;
6496 }
6497
6498 static HRESULT surface_cpu_blt_compressed(const BYTE *src_data, BYTE *dst_data,
6499         UINT src_pitch, UINT dst_pitch, UINT update_w, UINT update_h,
6500         const struct wined3d_format *format, DWORD flags, const WINEDDBLTFX *fx)
6501 {
6502     UINT row_block_count;
6503     const BYTE *src_row;
6504     BYTE *dst_row;
6505     UINT x, y;
6506
6507     src_row = src_data;
6508     dst_row = dst_data;
6509
6510     row_block_count = (update_w + format->block_width - 1) / format->block_width;
6511
6512     if (!flags)
6513     {
6514         for (y = 0; y < update_h; y += format->block_height)
6515         {
6516             memcpy(dst_row, src_row, row_block_count * format->block_byte_count);
6517             src_row += src_pitch;
6518             dst_row += dst_pitch;
6519         }
6520
6521         return WINED3D_OK;
6522     }
6523
6524     if (flags == WINEDDBLT_DDFX && fx->dwDDFX == WINEDDBLTFX_MIRRORUPDOWN)
6525     {
6526         src_row += (((update_h / format->block_height) - 1) * src_pitch);
6527
6528         switch (format->id)
6529         {
6530             case WINED3DFMT_DXT1:
6531                 for (y = 0; y < update_h; y += format->block_height)
6532                 {
6533                     struct block
6534                     {
6535                         WORD color[2];
6536                         BYTE control_row[4];
6537                     };
6538
6539                     const struct block *s = (const struct block *)src_row;
6540                     struct block *d = (struct block *)dst_row;
6541
6542                     for (x = 0; x < row_block_count; ++x)
6543                     {
6544                         d[x].color[0] = s[x].color[0];
6545                         d[x].color[1] = s[x].color[1];
6546                         d[x].control_row[0] = s[x].control_row[3];
6547                         d[x].control_row[1] = s[x].control_row[2];
6548                         d[x].control_row[2] = s[x].control_row[1];
6549                         d[x].control_row[3] = s[x].control_row[0];
6550                     }
6551                     src_row -= src_pitch;
6552                     dst_row += dst_pitch;
6553                 }
6554                 return WINED3D_OK;
6555
6556             case WINED3DFMT_DXT3:
6557                 for (y = 0; y < update_h; y += format->block_height)
6558                 {
6559                     struct block
6560                     {
6561                         WORD alpha_row[4];
6562                         WORD color[2];
6563                         BYTE control_row[4];
6564                     };
6565
6566                     const struct block *s = (const struct block *)src_row;
6567                     struct block *d = (struct block *)dst_row;
6568
6569                     for (x = 0; x < row_block_count; ++x)
6570                     {
6571                         d[x].alpha_row[0] = s[x].alpha_row[3];
6572                         d[x].alpha_row[1] = s[x].alpha_row[2];
6573                         d[x].alpha_row[2] = s[x].alpha_row[1];
6574                         d[x].alpha_row[3] = s[x].alpha_row[0];
6575                         d[x].color[0] = s[x].color[0];
6576                         d[x].color[1] = s[x].color[1];
6577                         d[x].control_row[0] = s[x].control_row[3];
6578                         d[x].control_row[1] = s[x].control_row[2];
6579                         d[x].control_row[2] = s[x].control_row[1];
6580                         d[x].control_row[3] = s[x].control_row[0];
6581                     }
6582                     src_row -= src_pitch;
6583                     dst_row += dst_pitch;
6584                 }
6585                 return WINED3D_OK;
6586
6587             default:
6588                 FIXME("Compressed flip not implemented for format %s.\n",
6589                         debug_d3dformat(format->id));
6590                 return E_NOTIMPL;
6591         }
6592     }
6593
6594     FIXME("Unsupported blit on compressed surface (format %s, flags %#x, DDFX %#x).\n",
6595             debug_d3dformat(format->id), flags, flags & WINEDDBLT_DDFX ? fx->dwDDFX : 0);
6596
6597     return E_NOTIMPL;
6598 }
6599
6600 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6601         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6602         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter)
6603 {
6604     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6605     const struct wined3d_format *src_format, *dst_format;
6606     struct wined3d_surface *orig_src = src_surface;
6607     struct wined3d_mapped_rect dst_map, src_map;
6608     HRESULT hr = WINED3D_OK;
6609     const BYTE *sbuf;
6610     RECT xdst,xsrc;
6611     BYTE *dbuf;
6612     int x, y;
6613
6614     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6615             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6616             flags, fx, debug_d3dtexturefiltertype(filter));
6617
6618     xsrc = *src_rect;
6619
6620     if (!src_surface)
6621     {
6622         RECT full_rect;
6623
6624         full_rect.left = 0;
6625         full_rect.top = 0;
6626         full_rect.right = dst_surface->resource.width;
6627         full_rect.bottom = dst_surface->resource.height;
6628         IntersectRect(&xdst, &full_rect, dst_rect);
6629     }
6630     else
6631     {
6632         BOOL clip_horiz, clip_vert;
6633
6634         xdst = *dst_rect;
6635         clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6636         clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6637
6638         if (clip_vert || clip_horiz)
6639         {
6640             /* Now check if this is a special case or not... */
6641             if ((flags & WINEDDBLT_DDFX)
6642                     || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6643                     || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6644             {
6645                 WARN("Out of screen rectangle in special case. Not handled right now.\n");
6646                 return WINED3D_OK;
6647             }
6648
6649             if (clip_horiz)
6650             {
6651                 if (xdst.left < 0)
6652                 {
6653                     xsrc.left -= xdst.left;
6654                     xdst.left = 0;
6655                 }
6656                 if (xdst.right > dst_surface->resource.width)
6657                 {
6658                     xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6659                     xdst.right = (int)dst_surface->resource.width;
6660                 }
6661             }
6662
6663             if (clip_vert)
6664             {
6665                 if (xdst.top < 0)
6666                 {
6667                     xsrc.top -= xdst.top;
6668                     xdst.top = 0;
6669                 }
6670                 if (xdst.bottom > dst_surface->resource.height)
6671                 {
6672                     xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6673                     xdst.bottom = (int)dst_surface->resource.height;
6674                 }
6675             }
6676
6677             /* And check if after clipping something is still to be done... */
6678             if ((xdst.right <= 0) || (xdst.bottom <= 0)
6679                     || (xdst.left >= (int)dst_surface->resource.width)
6680                     || (xdst.top >= (int)dst_surface->resource.height)
6681                     || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6682                     || (xsrc.left >= (int)src_surface->resource.width)
6683                     || (xsrc.top >= (int)src_surface->resource.height))
6684             {
6685                 TRACE("Nothing to be done after clipping.\n");
6686                 return WINED3D_OK;
6687             }
6688         }
6689     }
6690
6691     if (src_surface == dst_surface)
6692     {
6693         wined3d_surface_map(dst_surface, &dst_map, NULL, 0);
6694         src_map = dst_map;
6695         src_format = dst_surface->resource.format;
6696         dst_format = src_format;
6697     }
6698     else
6699     {
6700         dst_format = dst_surface->resource.format;
6701         if (src_surface)
6702         {
6703             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6704             {
6705                 src_surface = surface_convert_format(src_surface, dst_format->id);
6706                 if (!src_surface)
6707                 {
6708                     /* The conv function writes a FIXME */
6709                     WARN("Cannot convert source surface format to dest format.\n");
6710                     goto release;
6711                 }
6712             }
6713             wined3d_surface_map(src_surface, &src_map, NULL, WINED3DLOCK_READONLY);
6714             src_format = src_surface->resource.format;
6715         }
6716         else
6717         {
6718             src_format = dst_format;
6719         }
6720         if (dst_rect)
6721             wined3d_surface_map(dst_surface, &dst_map, &xdst, 0);
6722         else
6723             wined3d_surface_map(dst_surface, &dst_map, NULL, 0);
6724     }
6725
6726     bpp = dst_surface->resource.format->byte_count;
6727     srcheight = xsrc.bottom - xsrc.top;
6728     srcwidth = xsrc.right - xsrc.left;
6729     dstheight = xdst.bottom - xdst.top;
6730     dstwidth = xdst.right - xdst.left;
6731     width = (xdst.right - xdst.left) * bpp;
6732
6733     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_BLOCKS)
6734     {
6735         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6736
6737         if (src_surface == dst_surface)
6738         {
6739             FIXME("Only plain blits supported on compressed surfaces.\n");
6740             hr = E_NOTIMPL;
6741             goto release;
6742         }
6743
6744         if (srcheight != dstheight || srcwidth != dstwidth)
6745         {
6746             WARN("Stretching not supported on compressed surfaces.\n");
6747             hr = WINED3DERR_INVALIDCALL;
6748             goto release;
6749         }
6750
6751         if (srcwidth & (src_format->block_width - 1) || srcheight & (src_format->block_height - 1))
6752         {
6753             WARN("Rectangle not block-aligned.\n");
6754             hr = WINED3DERR_INVALIDCALL;
6755             goto release;
6756         }
6757
6758         hr = surface_cpu_blt_compressed(src_map.data, dst_map.data,
6759                 src_map.row_pitch, dst_map.row_pitch, dstwidth, dstheight,
6760                 src_format, flags, fx);
6761         goto release;
6762     }
6763
6764     if (dst_rect && src_surface != dst_surface)
6765         dbuf = dst_map.data;
6766     else
6767         dbuf = (BYTE *)dst_map.data + (xdst.top * dst_map.row_pitch) + (xdst.left * bpp);
6768
6769     /* First, all the 'source-less' blits */
6770     if (flags & WINEDDBLT_COLORFILL)
6771     {
6772         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, fx->u5.dwFillColor);
6773         flags &= ~WINEDDBLT_COLORFILL;
6774     }
6775
6776     if (flags & WINEDDBLT_DEPTHFILL)
6777     {
6778         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6779     }
6780     if (flags & WINEDDBLT_ROP)
6781     {
6782         /* Catch some degenerate cases here. */
6783         switch (fx->dwROP)
6784         {
6785             case BLACKNESS:
6786                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, 0);
6787                 break;
6788             case 0xAA0029: /* No-op */
6789                 break;
6790             case WHITENESS:
6791                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, ~0U);
6792                 break;
6793             case SRCCOPY: /* Well, we do that below? */
6794                 break;
6795             default:
6796                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6797                 goto error;
6798         }
6799         flags &= ~WINEDDBLT_ROP;
6800     }
6801     if (flags & WINEDDBLT_DDROPS)
6802     {
6803         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6804     }
6805     /* Now the 'with source' blits. */
6806     if (src_surface)
6807     {
6808         const BYTE *sbase;
6809         int sx, xinc, sy, yinc;
6810
6811         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6812             goto release;
6813
6814         if (filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT
6815                 && (srcwidth != dstwidth || srcheight != dstheight))
6816         {
6817             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6818             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6819         }
6820
6821         sbase = (BYTE *)src_map.data + (xsrc.top * src_map.row_pitch) + xsrc.left * bpp;
6822         xinc = (srcwidth << 16) / dstwidth;
6823         yinc = (srcheight << 16) / dstheight;
6824
6825         if (!flags)
6826         {
6827             /* No effects, we can cheat here. */
6828             if (dstwidth == srcwidth)
6829             {
6830                 if (dstheight == srcheight)
6831                 {
6832                     /* No stretching in either direction. This needs to be as
6833                      * fast as possible. */
6834                     sbuf = sbase;
6835
6836                     /* Check for overlapping surfaces. */
6837                     if (src_surface != dst_surface || xdst.top < xsrc.top
6838                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6839                     {
6840                         /* No overlap, or dst above src, so copy from top downwards. */
6841                         for (y = 0; y < dstheight; ++y)
6842                         {
6843                             memcpy(dbuf, sbuf, width);
6844                             sbuf += src_map.row_pitch;
6845                             dbuf += dst_map.row_pitch;
6846                         }
6847                     }
6848                     else if (xdst.top > xsrc.top)
6849                     {
6850                         /* Copy from bottom upwards. */
6851                         sbuf += src_map.row_pitch * dstheight;
6852                         dbuf += dst_map.row_pitch * dstheight;
6853                         for (y = 0; y < dstheight; ++y)
6854                         {
6855                             sbuf -= src_map.row_pitch;
6856                             dbuf -= dst_map.row_pitch;
6857                             memcpy(dbuf, sbuf, width);
6858                         }
6859                     }
6860                     else
6861                     {
6862                         /* Src and dst overlapping on the same line, use memmove. */
6863                         for (y = 0; y < dstheight; ++y)
6864                         {
6865                             memmove(dbuf, sbuf, width);
6866                             sbuf += src_map.row_pitch;
6867                             dbuf += dst_map.row_pitch;
6868                         }
6869                     }
6870                 }
6871                 else
6872                 {
6873                     /* Stretching in y direction only. */
6874                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6875                     {
6876                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6877                         memcpy(dbuf, sbuf, width);
6878                         dbuf += dst_map.row_pitch;
6879                     }
6880                 }
6881             }
6882             else
6883             {
6884                 /* Stretching in X direction. */
6885                 int last_sy = -1;
6886                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6887                 {
6888                     sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6889
6890                     if ((sy >> 16) == (last_sy >> 16))
6891                     {
6892                         /* This source row is the same as last source row -
6893                          * Copy the already stretched row. */
6894                         memcpy(dbuf, dbuf - dst_map.row_pitch, width);
6895                     }
6896                     else
6897                     {
6898 #define STRETCH_ROW(type) \
6899 do { \
6900     const type *s = (const type *)sbuf; \
6901     type *d = (type *)dbuf; \
6902     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6903         d[x] = s[sx >> 16]; \
6904 } while(0)
6905
6906                         switch(bpp)
6907                         {
6908                             case 1:
6909                                 STRETCH_ROW(BYTE);
6910                                 break;
6911                             case 2:
6912                                 STRETCH_ROW(WORD);
6913                                 break;
6914                             case 4:
6915                                 STRETCH_ROW(DWORD);
6916                                 break;
6917                             case 3:
6918                             {
6919                                 const BYTE *s;
6920                                 BYTE *d = dbuf;
6921                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6922                                 {
6923                                     DWORD pixel;
6924
6925                                     s = sbuf + 3 * (sx >> 16);
6926                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6927                                     d[0] = (pixel      ) & 0xff;
6928                                     d[1] = (pixel >>  8) & 0xff;
6929                                     d[2] = (pixel >> 16) & 0xff;
6930                                     d += 3;
6931                                 }
6932                                 break;
6933                             }
6934                             default:
6935                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6936                                 hr = WINED3DERR_NOTAVAILABLE;
6937                                 goto error;
6938                         }
6939 #undef STRETCH_ROW
6940                     }
6941                     dbuf += dst_map.row_pitch;
6942                     last_sy = sy;
6943                 }
6944             }
6945         }
6946         else
6947         {
6948             LONG dstyinc = dst_map.row_pitch, dstxinc = bpp;
6949             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6950             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6951             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6952             {
6953                 /* The color keying flags are checked for correctness in ddraw */
6954                 if (flags & WINEDDBLT_KEYSRC)
6955                 {
6956                     keylow  = src_surface->src_blt_color_key.color_space_low_value;
6957                     keyhigh = src_surface->src_blt_color_key.color_space_high_value;
6958                 }
6959                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6960                 {
6961                     keylow = fx->ddckSrcColorkey.color_space_low_value;
6962                     keyhigh = fx->ddckSrcColorkey.color_space_high_value;
6963                 }
6964
6965                 if (flags & WINEDDBLT_KEYDEST)
6966                 {
6967                     /* Destination color keys are taken from the source surface! */
6968                     destkeylow = src_surface->dst_blt_color_key.color_space_low_value;
6969                     destkeyhigh = src_surface->dst_blt_color_key.color_space_high_value;
6970                 }
6971                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6972                 {
6973                     destkeylow = fx->ddckDestColorkey.color_space_low_value;
6974                     destkeyhigh = fx->ddckDestColorkey.color_space_high_value;
6975                 }
6976
6977                 if (bpp == 1)
6978                 {
6979                     keymask = 0xff;
6980                 }
6981                 else
6982                 {
6983                     keymask = src_format->red_mask
6984                             | src_format->green_mask
6985                             | src_format->blue_mask;
6986                 }
6987                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6988             }
6989
6990             if (flags & WINEDDBLT_DDFX)
6991             {
6992                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6993                 LONG tmpxy;
6994                 dTopLeft     = dbuf;
6995                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6996                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dst_map.row_pitch);
6997                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6998
6999                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
7000                 {
7001                     /* I don't think we need to do anything about this flag */
7002                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
7003                 }
7004                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
7005                 {
7006                     tmp          = dTopRight;
7007                     dTopRight    = dTopLeft;
7008                     dTopLeft     = tmp;
7009                     tmp          = dBottomRight;
7010                     dBottomRight = dBottomLeft;
7011                     dBottomLeft  = tmp;
7012                     dstxinc = dstxinc * -1;
7013                 }
7014                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
7015                 {
7016                     tmp          = dTopLeft;
7017                     dTopLeft     = dBottomLeft;
7018                     dBottomLeft  = tmp;
7019                     tmp          = dTopRight;
7020                     dTopRight    = dBottomRight;
7021                     dBottomRight = tmp;
7022                     dstyinc = dstyinc * -1;
7023                 }
7024                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
7025                 {
7026                     /* I don't think we need to do anything about this flag */
7027                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
7028                 }
7029                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
7030                 {
7031                     tmp          = dBottomRight;
7032                     dBottomRight = dTopLeft;
7033                     dTopLeft     = tmp;
7034                     tmp          = dBottomLeft;
7035                     dBottomLeft  = dTopRight;
7036                     dTopRight    = tmp;
7037                     dstxinc = dstxinc * -1;
7038                     dstyinc = dstyinc * -1;
7039                 }
7040                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
7041                 {
7042                     tmp          = dTopLeft;
7043                     dTopLeft     = dBottomLeft;
7044                     dBottomLeft  = dBottomRight;
7045                     dBottomRight = dTopRight;
7046                     dTopRight    = tmp;
7047                     tmpxy   = dstxinc;
7048                     dstxinc = dstyinc;
7049                     dstyinc = tmpxy;
7050                     dstxinc = dstxinc * -1;
7051                 }
7052                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
7053                 {
7054                     tmp          = dTopLeft;
7055                     dTopLeft     = dTopRight;
7056                     dTopRight    = dBottomRight;
7057                     dBottomRight = dBottomLeft;
7058                     dBottomLeft  = tmp;
7059                     tmpxy   = dstxinc;
7060                     dstxinc = dstyinc;
7061                     dstyinc = tmpxy;
7062                     dstyinc = dstyinc * -1;
7063                 }
7064                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
7065                 {
7066                     /* I don't think we need to do anything about this flag */
7067                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
7068                 }
7069                 dbuf = dTopLeft;
7070                 flags &= ~(WINEDDBLT_DDFX);
7071             }
7072
7073 #define COPY_COLORKEY_FX(type) \
7074 do { \
7075     const type *s; \
7076     type *d = (type *)dbuf, *dx, tmp; \
7077     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
7078     { \
7079         s = (const type *)(sbase + (sy >> 16) * src_map.row_pitch); \
7080         dx = d; \
7081         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
7082         { \
7083             tmp = s[sx >> 16]; \
7084             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
7085                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
7086             { \
7087                 dx[0] = tmp; \
7088             } \
7089             dx = (type *)(((BYTE *)dx) + dstxinc); \
7090         } \
7091         d = (type *)(((BYTE *)d) + dstyinc); \
7092     } \
7093 } while(0)
7094
7095             switch (bpp)
7096             {
7097                 case 1:
7098                     COPY_COLORKEY_FX(BYTE);
7099                     break;
7100                 case 2:
7101                     COPY_COLORKEY_FX(WORD);
7102                     break;
7103                 case 4:
7104                     COPY_COLORKEY_FX(DWORD);
7105                     break;
7106                 case 3:
7107                 {
7108                     const BYTE *s;
7109                     BYTE *d = dbuf, *dx;
7110                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
7111                     {
7112                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
7113                         dx = d;
7114                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7115                         {
7116                             DWORD pixel, dpixel = 0;
7117                             s = sbuf + 3 * (sx>>16);
7118                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7119                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7120                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7121                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7122                             {
7123                                 dx[0] = (pixel      ) & 0xff;
7124                                 dx[1] = (pixel >>  8) & 0xff;
7125                                 dx[2] = (pixel >> 16) & 0xff;
7126                             }
7127                             dx += dstxinc;
7128                         }
7129                         d += dstyinc;
7130                     }
7131                     break;
7132                 }
7133                 default:
7134                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7135                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7136                     hr = WINED3DERR_NOTAVAILABLE;
7137                     goto error;
7138 #undef COPY_COLORKEY_FX
7139             }
7140         }
7141     }
7142
7143 error:
7144     if (flags && FIXME_ON(d3d_surface))
7145     {
7146         FIXME("\tUnsupported flags: %#x.\n", flags);
7147     }
7148
7149 release:
7150     wined3d_surface_unmap(dst_surface);
7151     if (src_surface && src_surface != dst_surface)
7152         wined3d_surface_unmap(src_surface);
7153     /* Release the converted surface, if any. */
7154     if (src_surface && src_surface != orig_src)
7155         wined3d_surface_decref(src_surface);
7156
7157     return hr;
7158 }
7159
7160 /* Do not call while under the GL lock. */
7161 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7162         const RECT *dst_rect, const struct wined3d_color *color)
7163 {
7164     static const RECT src_rect;
7165     WINEDDBLTFX BltFx;
7166
7167     memset(&BltFx, 0, sizeof(BltFx));
7168     BltFx.dwSize = sizeof(BltFx);
7169     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7170     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7171             WINEDDBLT_COLORFILL, &BltFx, WINED3D_TEXF_POINT);
7172 }
7173
7174 /* Do not call while under the GL lock. */
7175 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7176         struct wined3d_surface *surface, const RECT *rect, float depth)
7177 {
7178     FIXME("Depth filling not implemented by cpu_blit.\n");
7179     return WINED3DERR_INVALIDCALL;
7180 }
7181
7182 const struct blit_shader cpu_blit =  {
7183     cpu_blit_alloc,
7184     cpu_blit_free,
7185     cpu_blit_set,
7186     cpu_blit_unset,
7187     cpu_blit_supported,
7188     cpu_blit_color_fill,
7189     cpu_blit_depth_fill,
7190 };
7191
7192 static HRESULT surface_init(struct wined3d_surface *surface, WINED3DSURFTYPE surface_type, UINT alignment,
7193         UINT width, UINT height, UINT level, enum wined3d_multisample_type multisample_type,
7194         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7195         enum wined3d_pool pool, DWORD flags, void *parent, const struct wined3d_parent_ops *parent_ops)
7196 {
7197     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7198     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7199     BOOL lockable = flags & WINED3D_SURFACE_MAPPABLE;
7200     unsigned int resource_size;
7201     HRESULT hr;
7202
7203     if (multisample_quality > 0)
7204     {
7205         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7206         multisample_quality = 0;
7207     }
7208
7209     /* Quick lockable sanity check.
7210      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7211      * this function is too deep to need to care about things like this.
7212      * Levels need to be checked too, since they all affect what can be done. */
7213     switch (pool)
7214     {
7215         case WINED3D_POOL_SCRATCH:
7216             if (!lockable)
7217             {
7218                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7219                         "which are mutually exclusive, setting lockable to TRUE.\n");
7220                 lockable = TRUE;
7221             }
7222             break;
7223
7224         case WINED3D_POOL_SYSTEM_MEM:
7225             if (!lockable)
7226                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7227             break;
7228
7229         case WINED3D_POOL_MANAGED:
7230             if (usage & WINED3DUSAGE_DYNAMIC)
7231                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7232             break;
7233
7234         case WINED3D_POOL_DEFAULT:
7235             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7236                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7237             break;
7238
7239         default:
7240             FIXME("Unknown pool %#x.\n", pool);
7241             break;
7242     };
7243
7244     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3D_POOL_DEFAULT)
7245         FIXME("Trying to create a render target that isn't in the default pool.\n");
7246
7247     /* FIXME: Check that the format is supported by the device. */
7248
7249     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7250     if (!resource_size)
7251         return WINED3DERR_INVALIDCALL;
7252
7253     surface->surface_type = surface_type;
7254
7255     switch (surface_type)
7256     {
7257         case SURFACE_OPENGL:
7258             surface->surface_ops = &surface_ops;
7259             break;
7260
7261         case SURFACE_GDI:
7262             surface->surface_ops = &gdi_surface_ops;
7263             break;
7264
7265         default:
7266             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7267             return WINED3DERR_INVALIDCALL;
7268     }
7269
7270     hr = resource_init(&surface->resource, device, WINED3D_RTYPE_SURFACE, format,
7271             multisample_type, multisample_quality, usage, pool, width, height, 1,
7272             resource_size, parent, parent_ops, &surface_resource_ops);
7273     if (FAILED(hr))
7274     {
7275         WARN("Failed to initialize resource, returning %#x.\n", hr);
7276         return hr;
7277     }
7278
7279     /* "Standalone" surface. */
7280     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7281
7282     surface->texture_level = level;
7283     list_init(&surface->overlays);
7284
7285     /* Flags */
7286     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7287     if (flags & WINED3D_SURFACE_DISCARD)
7288         surface->flags |= SFLAG_DISCARD;
7289     if (flags & WINED3D_SURFACE_PIN_SYSMEM)
7290         surface->flags |= SFLAG_PIN_SYSMEM;
7291     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7292         surface->flags |= SFLAG_LOCKABLE;
7293     /* I'm not sure if this qualifies as a hack or as an optimization. It
7294      * seems reasonable to assume that lockable render targets will get
7295      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7296      * creation. However, the other reason we want to do this is that several
7297      * ddraw applications access surface memory while the surface isn't
7298      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7299      * future locks prevents these from crashing. */
7300     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7301         surface->flags |= SFLAG_DYNLOCK;
7302
7303     /* Mark the texture as dirty so that it gets loaded first time around. */
7304     surface_add_dirty_rect(surface, NULL);
7305     list_init(&surface->renderbuffers);
7306
7307     TRACE("surface %p, memory %p, size %u\n",
7308             surface, surface->resource.allocatedMemory, surface->resource.size);
7309
7310     /* Call the private setup routine */
7311     hr = surface->surface_ops->surface_private_setup(surface);
7312     if (FAILED(hr))
7313     {
7314         ERR("Private setup failed, returning %#x\n", hr);
7315         surface_cleanup(surface);
7316         return hr;
7317     }
7318
7319     /* Similar to lockable rendertargets above, creating the DIB section
7320      * during surface initialization prevents the sysmem pointer from changing
7321      * after a wined3d_surface_getdc() call. */
7322     if ((usage & WINED3DUSAGE_OWNDC) && !surface->hDC
7323             && SUCCEEDED(surface_create_dib_section(surface)))
7324     {
7325         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
7326         surface->resource.heapMemory = NULL;
7327         surface->resource.allocatedMemory = surface->dib.bitmap_data;
7328     }
7329
7330     return hr;
7331 }
7332
7333 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7334         enum wined3d_format_id format_id, UINT level, DWORD usage, enum wined3d_pool pool,
7335         enum wined3d_multisample_type multisample_type, DWORD multisample_quality, WINED3DSURFTYPE surface_type,
7336         DWORD flags, void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7337 {
7338     struct wined3d_surface *object;
7339     HRESULT hr;
7340
7341     TRACE("device %p, width %u, height %u, format %s, level %u\n",
7342             device, width, height, debug_d3dformat(format_id), level);
7343     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7344             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7345     TRACE("surface_type %#x, flags %#x, parent %p, parent_ops %p.\n", surface_type, flags, parent, parent_ops);
7346
7347     if (surface_type == SURFACE_OPENGL && !device->adapter)
7348     {
7349         ERR("OpenGL surfaces are not available without OpenGL.\n");
7350         return WINED3DERR_NOTAVAILABLE;
7351     }
7352
7353     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7354     if (!object)
7355     {
7356         ERR("Failed to allocate surface memory.\n");
7357         return WINED3DERR_OUTOFVIDEOMEMORY;
7358     }
7359
7360     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level,
7361             multisample_type, multisample_quality, device, usage, format_id, pool, flags, parent, parent_ops);
7362     if (FAILED(hr))
7363     {
7364         WARN("Failed to initialize surface, returning %#x.\n", hr);
7365         HeapFree(GetProcessHeap(), 0, object);
7366         return hr;
7367     }
7368
7369     TRACE("Created surface %p.\n", object);
7370     *surface = object;
7371
7372     return hr;
7373 }