d3d9: Don't expose wined3d internal flags to the application.
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2011 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         WINED3DTEXTUREFILTERTYPE filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     struct wined3d_surface *overlay, *cur;
46
47     TRACE("surface %p.\n", surface);
48
49     if (surface->texture_name || (surface->flags & SFLAG_PBO)
50              || surface->rb_multisample || surface->rb_resolved
51              || !list_empty(&surface->renderbuffers))
52     {
53         struct wined3d_renderbuffer_entry *entry, *entry2;
54         const struct wined3d_gl_info *gl_info;
55         struct wined3d_context *context;
56
57         context = context_acquire(surface->resource.device, NULL);
58         gl_info = context->gl_info;
59
60         ENTER_GL();
61
62         if (surface->texture_name)
63         {
64             TRACE("Deleting texture %u.\n", surface->texture_name);
65             glDeleteTextures(1, &surface->texture_name);
66         }
67
68         if (surface->flags & SFLAG_PBO)
69         {
70             TRACE("Deleting PBO %u.\n", surface->pbo);
71             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
72         }
73
74         if (surface->rb_multisample)
75         {
76             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
77             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
78         }
79
80         if (surface->rb_resolved)
81         {
82             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
83             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
84         }
85
86         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
87         {
88             TRACE("Deleting renderbuffer %u.\n", entry->id);
89             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
90             HeapFree(GetProcessHeap(), 0, entry);
91         }
92
93         LEAVE_GL();
94
95         context_release(context);
96     }
97
98     if (surface->flags & SFLAG_DIBSECTION)
99     {
100         DeleteDC(surface->hDC);
101         DeleteObject(surface->dib.DIBsection);
102         surface->dib.bitmap_data = NULL;
103         surface->resource.allocatedMemory = NULL;
104     }
105
106     if (surface->flags & SFLAG_USERPTR)
107         wined3d_surface_set_mem(surface, NULL);
108     if (surface->overlay_dest)
109         list_remove(&surface->overlay_entry);
110
111     LIST_FOR_EACH_ENTRY_SAFE(overlay, cur, &surface->overlays, struct wined3d_surface, overlay_entry)
112     {
113         list_remove(&overlay->overlay_entry);
114         overlay->overlay_dest = NULL;
115     }
116
117     resource_cleanup(&surface->resource);
118 }
119
120 void surface_update_draw_binding(struct wined3d_surface *surface)
121 {
122     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
123         surface->draw_binding = SFLAG_INDRAWABLE;
124     else if (surface->resource.multisample_type)
125         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
126     else
127         surface->draw_binding = SFLAG_INTEXTURE;
128 }
129
130 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
131 {
132     TRACE("surface %p, container %p.\n", surface, container);
133
134     if (!container && type != WINED3D_CONTAINER_NONE)
135         ERR("Setting NULL container of type %#x.\n", type);
136
137     if (type == WINED3D_CONTAINER_SWAPCHAIN)
138     {
139         surface->get_drawable_size = get_drawable_size_swapchain;
140     }
141     else
142     {
143         switch (wined3d_settings.offscreen_rendering_mode)
144         {
145             case ORM_FBO:
146                 surface->get_drawable_size = get_drawable_size_fbo;
147                 break;
148
149             case ORM_BACKBUFFER:
150                 surface->get_drawable_size = get_drawable_size_backbuffer;
151                 break;
152
153             default:
154                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
155                 return;
156         }
157     }
158
159     surface->container.type = type;
160     surface->container.u.base = container;
161     surface_update_draw_binding(surface);
162 }
163
164 struct blt_info
165 {
166     GLenum binding;
167     GLenum bind_target;
168     enum tex_types tex_type;
169     GLfloat coords[4][3];
170 };
171
172 struct float_rect
173 {
174     float l;
175     float t;
176     float r;
177     float b;
178 };
179
180 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
181 {
182     f->l = ((r->left * 2.0f) / w) - 1.0f;
183     f->t = ((r->top * 2.0f) / h) - 1.0f;
184     f->r = ((r->right * 2.0f) / w) - 1.0f;
185     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
186 }
187
188 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
189 {
190     GLfloat (*coords)[3] = info->coords;
191     struct float_rect f;
192
193     switch (target)
194     {
195         default:
196             FIXME("Unsupported texture target %#x\n", target);
197             /* Fall back to GL_TEXTURE_2D */
198         case GL_TEXTURE_2D:
199             info->binding = GL_TEXTURE_BINDING_2D;
200             info->bind_target = GL_TEXTURE_2D;
201             info->tex_type = tex_2d;
202             coords[0][0] = (float)rect->left / w;
203             coords[0][1] = (float)rect->top / h;
204             coords[0][2] = 0.0f;
205
206             coords[1][0] = (float)rect->right / w;
207             coords[1][1] = (float)rect->top / h;
208             coords[1][2] = 0.0f;
209
210             coords[2][0] = (float)rect->left / w;
211             coords[2][1] = (float)rect->bottom / h;
212             coords[2][2] = 0.0f;
213
214             coords[3][0] = (float)rect->right / w;
215             coords[3][1] = (float)rect->bottom / h;
216             coords[3][2] = 0.0f;
217             break;
218
219         case GL_TEXTURE_RECTANGLE_ARB:
220             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
221             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
222             info->tex_type = tex_rect;
223             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
224             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
225             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
226             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
227             break;
228
229         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
230             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
231             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
232             info->tex_type = tex_cube;
233             cube_coords_float(rect, w, h, &f);
234
235             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
236             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
237             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
238             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
239             break;
240
241         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
242             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
243             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
244             info->tex_type = tex_cube;
245             cube_coords_float(rect, w, h, &f);
246
247             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
248             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
249             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
250             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
251             break;
252
253         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
254             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
255             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
256             info->tex_type = tex_cube;
257             cube_coords_float(rect, w, h, &f);
258
259             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
260             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
261             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
262             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
263             break;
264
265         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
266             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
267             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
268             info->tex_type = tex_cube;
269             cube_coords_float(rect, w, h, &f);
270
271             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
272             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
273             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
274             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
275             break;
276
277         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
278             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
279             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
280             info->tex_type = tex_cube;
281             cube_coords_float(rect, w, h, &f);
282
283             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
284             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
285             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
286             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
287             break;
288
289         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
290             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
291             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
292             info->tex_type = tex_cube;
293             cube_coords_float(rect, w, h, &f);
294
295             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
296             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
297             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
298             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
299             break;
300     }
301 }
302
303 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
304 {
305     if (rect_in)
306         *rect_out = *rect_in;
307     else
308     {
309         rect_out->left = 0;
310         rect_out->top = 0;
311         rect_out->right = surface->resource.width;
312         rect_out->bottom = surface->resource.height;
313     }
314 }
315
316 /* GL locking and context activation is done by the caller */
317 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
318         const RECT *src_rect, const RECT *dst_rect, WINED3DTEXTUREFILTERTYPE Filter)
319 {
320     struct blt_info info;
321
322     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
323
324     glEnable(info.bind_target);
325     checkGLcall("glEnable(bind_target)");
326
327     context_bind_texture(context, info.bind_target, src_surface->texture_name);
328
329     /* Filtering for StretchRect */
330     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
331             wined3d_gl_mag_filter(magLookup, Filter));
332     checkGLcall("glTexParameteri");
333     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
334             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
335     checkGLcall("glTexParameteri");
336     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
337     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
338     if (context->gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
339         glTexParameteri(info.bind_target, GL_TEXTURE_SRGB_DECODE_EXT, GL_SKIP_DECODE_EXT);
340     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
341     checkGLcall("glTexEnvi");
342
343     /* Draw a quad */
344     glBegin(GL_TRIANGLE_STRIP);
345     glTexCoord3fv(info.coords[0]);
346     glVertex2i(dst_rect->left, dst_rect->top);
347
348     glTexCoord3fv(info.coords[1]);
349     glVertex2i(dst_rect->right, dst_rect->top);
350
351     glTexCoord3fv(info.coords[2]);
352     glVertex2i(dst_rect->left, dst_rect->bottom);
353
354     glTexCoord3fv(info.coords[3]);
355     glVertex2i(dst_rect->right, dst_rect->bottom);
356     glEnd();
357
358     /* Unbind the texture */
359     context_bind_texture(context, info.bind_target, 0);
360
361     /* We changed the filtering settings on the texture. Inform the
362      * container about this to get the filters reset properly next draw. */
363     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
364     {
365         struct wined3d_texture *texture = src_surface->container.u.texture;
366         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3DTEXF_POINT;
367         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3DTEXF_POINT;
368         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3DTEXF_NONE;
369         texture->texture_rgb.states[WINED3DTEXSTA_SRGBTEXTURE] = FALSE;
370     }
371 }
372
373 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
374 {
375     const struct wined3d_format *format = surface->resource.format;
376     SYSTEM_INFO sysInfo;
377     BITMAPINFO *b_info;
378     int extraline = 0;
379     DWORD *masks;
380     UINT usage;
381     HDC dc;
382
383     TRACE("surface %p.\n", surface);
384
385     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
386     {
387         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
388         return WINED3DERR_INVALIDCALL;
389     }
390
391     switch (format->byte_count)
392     {
393         case 2:
394         case 4:
395             /* Allocate extra space to store the RGB bit masks. */
396             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
397             break;
398
399         case 3:
400             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
401             break;
402
403         default:
404             /* Allocate extra space for a palette. */
405             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
406                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
407             break;
408     }
409
410     if (!b_info)
411         return E_OUTOFMEMORY;
412
413     /* Some applications access the surface in via DWORDs, and do not take
414      * the necessary care at the end of the surface. So we need at least
415      * 4 extra bytes at the end of the surface. Check against the page size,
416      * if the last page used for the surface has at least 4 spare bytes we're
417      * safe, otherwise add an extra line to the DIB section. */
418     GetSystemInfo(&sysInfo);
419     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
420     {
421         extraline = 1;
422         TRACE("Adding an extra line to the DIB section.\n");
423     }
424
425     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
426     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
427     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
428     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
429     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
430             * wined3d_surface_get_pitch(surface);
431     b_info->bmiHeader.biPlanes = 1;
432     b_info->bmiHeader.biBitCount = format->byte_count * 8;
433
434     b_info->bmiHeader.biXPelsPerMeter = 0;
435     b_info->bmiHeader.biYPelsPerMeter = 0;
436     b_info->bmiHeader.biClrUsed = 0;
437     b_info->bmiHeader.biClrImportant = 0;
438
439     /* Get the bit masks */
440     masks = (DWORD *)b_info->bmiColors;
441     switch (surface->resource.format->id)
442     {
443         case WINED3DFMT_B8G8R8_UNORM:
444             usage = DIB_RGB_COLORS;
445             b_info->bmiHeader.biCompression = BI_RGB;
446             break;
447
448         case WINED3DFMT_B5G5R5X1_UNORM:
449         case WINED3DFMT_B5G5R5A1_UNORM:
450         case WINED3DFMT_B4G4R4A4_UNORM:
451         case WINED3DFMT_B4G4R4X4_UNORM:
452         case WINED3DFMT_B2G3R3_UNORM:
453         case WINED3DFMT_B2G3R3A8_UNORM:
454         case WINED3DFMT_R10G10B10A2_UNORM:
455         case WINED3DFMT_R8G8B8A8_UNORM:
456         case WINED3DFMT_R8G8B8X8_UNORM:
457         case WINED3DFMT_B10G10R10A2_UNORM:
458         case WINED3DFMT_B5G6R5_UNORM:
459         case WINED3DFMT_R16G16B16A16_UNORM:
460             usage = 0;
461             b_info->bmiHeader.biCompression = BI_BITFIELDS;
462             masks[0] = format->red_mask;
463             masks[1] = format->green_mask;
464             masks[2] = format->blue_mask;
465             break;
466
467         default:
468             /* Don't know palette */
469             b_info->bmiHeader.biCompression = BI_RGB;
470             usage = 0;
471             break;
472     }
473
474     if (!(dc = GetDC(0)))
475     {
476         HeapFree(GetProcessHeap(), 0, b_info);
477         return HRESULT_FROM_WIN32(GetLastError());
478     }
479
480     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
481             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
482             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
483     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
484     ReleaseDC(0, dc);
485
486     if (!surface->dib.DIBsection)
487     {
488         ERR("Failed to create DIB section.\n");
489         HeapFree(GetProcessHeap(), 0, b_info);
490         return HRESULT_FROM_WIN32(GetLastError());
491     }
492
493     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
494     /* Copy the existing surface to the dib section. */
495     if (surface->resource.allocatedMemory)
496     {
497         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
498                 surface->resource.height * wined3d_surface_get_pitch(surface));
499     }
500     else
501     {
502         /* This is to make maps read the GL texture although memory is allocated. */
503         surface->flags &= ~SFLAG_INSYSMEM;
504     }
505     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
506
507     HeapFree(GetProcessHeap(), 0, b_info);
508
509     /* Now allocate a DC. */
510     surface->hDC = CreateCompatibleDC(0);
511     SelectObject(surface->hDC, surface->dib.DIBsection);
512     TRACE("Using wined3d palette %p.\n", surface->palette);
513     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
514
515     surface->flags |= SFLAG_DIBSECTION;
516
517     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
518     surface->resource.heapMemory = NULL;
519
520     return WINED3D_OK;
521 }
522
523 static BOOL surface_need_pbo(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
524 {
525     if (surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
526         return FALSE;
527     if (!(surface->flags & SFLAG_DYNLOCK))
528         return FALSE;
529     if (surface->flags & (SFLAG_CONVERTED | SFLAG_NONPOW2 | SFLAG_PIN_SYSMEM))
530         return FALSE;
531     if (!gl_info->supported[ARB_PIXEL_BUFFER_OBJECT])
532         return FALSE;
533
534     return TRUE;
535 }
536
537 static void surface_load_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
538 {
539     struct wined3d_context *context;
540     GLenum error;
541
542     context = context_acquire(surface->resource.device, NULL);
543     ENTER_GL();
544
545     GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
546     error = glGetError();
547     if (!surface->pbo || error != GL_NO_ERROR)
548         ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
549
550     TRACE("Binding PBO %u.\n", surface->pbo);
551
552     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
553     checkGLcall("glBindBufferARB");
554
555     GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
556             surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
557     checkGLcall("glBufferDataARB");
558
559     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
560     checkGLcall("glBindBufferARB");
561
562     /* We don't need the system memory anymore and we can't even use it for PBOs. */
563     if (!(surface->flags & SFLAG_CLIENT))
564     {
565         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
566         surface->resource.heapMemory = NULL;
567     }
568     surface->resource.allocatedMemory = NULL;
569     surface->flags |= SFLAG_PBO;
570     LEAVE_GL();
571     context_release(context);
572 }
573
574 static void surface_prepare_system_memory(struct wined3d_surface *surface)
575 {
576     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
577
578     TRACE("surface %p.\n", surface);
579
580     if (!(surface->flags & SFLAG_PBO) && surface_need_pbo(surface, gl_info))
581         surface_load_pbo(surface, gl_info);
582     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
583     {
584         /* Whatever surface we have, make sure that there is memory allocated
585          * for the downloaded copy, or a PBO to map. */
586         if (!surface->resource.heapMemory)
587             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
588
589         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
590                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
591
592         if (surface->flags & SFLAG_INSYSMEM)
593             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
594     }
595 }
596
597 static void surface_evict_sysmem(struct wined3d_surface *surface)
598 {
599     if (surface->flags & SFLAG_DONOTFREE)
600         return;
601
602     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
603     surface->resource.allocatedMemory = NULL;
604     surface->resource.heapMemory = NULL;
605     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
606 }
607
608 /* Context activation is done by the caller. */
609 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
610         struct wined3d_context *context, BOOL srgb)
611 {
612     struct wined3d_device *device = surface->resource.device;
613     DWORD active_sampler;
614
615     /* We don't need a specific texture unit, but after binding the texture
616      * the current unit is dirty. Read the unit back instead of switching to
617      * 0, this avoids messing around with the state manager's GL states. The
618      * current texture unit should always be a valid one.
619      *
620      * To be more specific, this is tricky because we can implicitly be
621      * called from sampler() in state.c. This means we can't touch anything
622      * other than whatever happens to be the currently active texture, or we
623      * would risk marking already applied sampler states dirty again. */
624     active_sampler = device->rev_tex_unit_map[context->active_texture];
625
626     if (active_sampler != WINED3D_UNMAPPED_STAGE)
627         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
628     surface_bind(surface, context, srgb);
629 }
630
631 static void surface_force_reload(struct wined3d_surface *surface)
632 {
633     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
634 }
635
636 static void surface_release_client_storage(struct wined3d_surface *surface)
637 {
638     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
639
640     ENTER_GL();
641     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
642     if (surface->texture_name)
643     {
644         surface_bind_and_dirtify(surface, context, FALSE);
645         glTexImage2D(surface->texture_target, surface->texture_level,
646                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
647     }
648     if (surface->texture_name_srgb)
649     {
650         surface_bind_and_dirtify(surface, context, TRUE);
651         glTexImage2D(surface->texture_target, surface->texture_level,
652                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
653     }
654     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
655     LEAVE_GL();
656
657     context_release(context);
658
659     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
660     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
661     surface_force_reload(surface);
662 }
663
664 static HRESULT surface_private_setup(struct wined3d_surface *surface)
665 {
666     /* TODO: Check against the maximum texture sizes supported by the video card. */
667     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
668     unsigned int pow2Width, pow2Height;
669
670     TRACE("surface %p.\n", surface);
671
672     surface->texture_name = 0;
673     surface->texture_target = GL_TEXTURE_2D;
674
675     /* Non-power2 support */
676     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
677     {
678         pow2Width = surface->resource.width;
679         pow2Height = surface->resource.height;
680     }
681     else
682     {
683         /* Find the nearest pow2 match */
684         pow2Width = pow2Height = 1;
685         while (pow2Width < surface->resource.width)
686             pow2Width <<= 1;
687         while (pow2Height < surface->resource.height)
688             pow2Height <<= 1;
689     }
690     surface->pow2Width = pow2Width;
691     surface->pow2Height = pow2Height;
692
693     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
694     {
695         /* TODO: Add support for non power two compressed textures. */
696         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
697         {
698             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
699                   surface, surface->resource.width, surface->resource.height);
700             return WINED3DERR_NOTAVAILABLE;
701         }
702     }
703
704     if (pow2Width != surface->resource.width
705             || pow2Height != surface->resource.height)
706     {
707         surface->flags |= SFLAG_NONPOW2;
708     }
709
710     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
711             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
712     {
713         /* One of three options:
714          * 1: Do the same as we do with NPOT and scale the texture, (any
715          *    texture ops would require the texture to be scaled which is
716          *    potentially slow)
717          * 2: Set the texture to the maximum size (bad idea).
718          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
719          * 4: Create the surface, but allow it to be used only for DirectDraw
720          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
721          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
722          *    the render target. */
723         if (surface->resource.pool == WINED3DPOOL_DEFAULT || surface->resource.pool == WINED3DPOOL_MANAGED)
724         {
725             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
726             return WINED3DERR_NOTAVAILABLE;
727         }
728
729         /* We should never use this surface in combination with OpenGL! */
730         TRACE("Creating an oversized surface: %ux%u.\n",
731                 surface->pow2Width, surface->pow2Height);
732     }
733     else
734     {
735         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
736          * and EXT_PALETTED_TEXTURE is used in combination with texture
737          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
738          * EXT_PALETTED_TEXTURE doesn't work in combination with
739          * ARB_TEXTURE_RECTANGLE. */
740         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
741                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
742                 && gl_info->supported[EXT_PALETTED_TEXTURE]
743                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
744         {
745             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
746             surface->pow2Width = surface->resource.width;
747             surface->pow2Height = surface->resource.height;
748             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
749         }
750     }
751
752     switch (wined3d_settings.offscreen_rendering_mode)
753     {
754         case ORM_FBO:
755             surface->get_drawable_size = get_drawable_size_fbo;
756             break;
757
758         case ORM_BACKBUFFER:
759             surface->get_drawable_size = get_drawable_size_backbuffer;
760             break;
761
762         default:
763             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
764             return WINED3DERR_INVALIDCALL;
765     }
766
767     surface->flags |= SFLAG_INSYSMEM;
768
769     return WINED3D_OK;
770 }
771
772 static void surface_realize_palette(struct wined3d_surface *surface)
773 {
774     struct wined3d_palette *palette = surface->palette;
775
776     TRACE("surface %p.\n", surface);
777
778     if (!palette) return;
779
780     if (surface->resource.format->id == WINED3DFMT_P8_UINT
781             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
782     {
783         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
784         {
785             /* Make sure the texture is up to date. This call doesn't do
786              * anything if the texture is already up to date. */
787             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
788
789             /* We want to force a palette refresh, so mark the drawable as not being up to date */
790             if (!surface_is_offscreen(surface))
791                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
792         }
793         else
794         {
795             if (!(surface->flags & SFLAG_INSYSMEM))
796             {
797                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
798                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
799             }
800             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
801         }
802     }
803
804     if (surface->flags & SFLAG_DIBSECTION)
805     {
806         RGBQUAD col[256];
807         unsigned int i;
808
809         TRACE("Updating the DC's palette.\n");
810
811         for (i = 0; i < 256; ++i)
812         {
813             col[i].rgbRed   = palette->palents[i].peRed;
814             col[i].rgbGreen = palette->palents[i].peGreen;
815             col[i].rgbBlue  = palette->palents[i].peBlue;
816             col[i].rgbReserved = 0;
817         }
818         SetDIBColorTable(surface->hDC, 0, 256, col);
819     }
820
821     /* Propagate the changes to the drawable when we have a palette. */
822     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
823         surface_load_location(surface, surface->draw_binding, NULL);
824 }
825
826 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
827 {
828     HRESULT hr;
829
830     /* If there's no destination surface there is nothing to do. */
831     if (!surface->overlay_dest)
832         return WINED3D_OK;
833
834     /* Blt calls ModifyLocation on the dest surface, which in turn calls
835      * DrawOverlay to update the overlay. Prevent an endless recursion. */
836     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
837         return WINED3D_OK;
838
839     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
840     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
841             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3DTEXF_LINEAR);
842     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
843
844     return hr;
845 }
846
847 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
848 {
849     struct wined3d_device *device = surface->resource.device;
850     const RECT *pass_rect = rect;
851
852     TRACE("surface %p, rect %s, flags %#x.\n",
853             surface, wine_dbgstr_rect(rect), flags);
854
855     if (flags & WINED3DLOCK_DISCARD)
856     {
857         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
858         surface_prepare_system_memory(surface);
859         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
860     }
861     else
862     {
863         /* surface_load_location() does not check if the rectangle specifies
864          * the full surface. Most callers don't need that, so do it here. */
865         if (rect && !rect->top && !rect->left
866                 && rect->right == surface->resource.width
867                 && rect->bottom == surface->resource.height)
868             pass_rect = NULL;
869         surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
870     }
871
872     if (surface->flags & SFLAG_PBO)
873     {
874         const struct wined3d_gl_info *gl_info;
875         struct wined3d_context *context;
876
877         context = context_acquire(device, NULL);
878         gl_info = context->gl_info;
879
880         ENTER_GL();
881         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
882         checkGLcall("glBindBufferARB");
883
884         /* This shouldn't happen but could occur if some other function
885          * didn't handle the PBO properly. */
886         if (surface->resource.allocatedMemory)
887             ERR("The surface already has PBO memory allocated.\n");
888
889         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
890         checkGLcall("glMapBufferARB");
891
892         /* Make sure the PBO isn't set anymore in order not to break non-PBO
893          * calls. */
894         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
895         checkGLcall("glBindBufferARB");
896
897         LEAVE_GL();
898         context_release(context);
899     }
900
901     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
902     {
903         if (!rect)
904             surface_add_dirty_rect(surface, NULL);
905         else
906         {
907             struct wined3d_box b;
908
909             b.left = rect->left;
910             b.top = rect->top;
911             b.right = rect->right;
912             b.bottom = rect->bottom;
913             b.front = 0;
914             b.back = 1;
915             surface_add_dirty_rect(surface, &b);
916         }
917     }
918 }
919
920 static void surface_unmap(struct wined3d_surface *surface)
921 {
922     struct wined3d_device *device = surface->resource.device;
923     BOOL fullsurface;
924
925     TRACE("surface %p.\n", surface);
926
927     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
928
929     if (surface->flags & SFLAG_PBO)
930     {
931         const struct wined3d_gl_info *gl_info;
932         struct wined3d_context *context;
933
934         TRACE("Freeing PBO memory.\n");
935
936         context = context_acquire(device, NULL);
937         gl_info = context->gl_info;
938
939         ENTER_GL();
940         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
941         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
942         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
943         checkGLcall("glUnmapBufferARB");
944         LEAVE_GL();
945         context_release(context);
946
947         surface->resource.allocatedMemory = NULL;
948     }
949
950     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
951
952     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
953     {
954         TRACE("Not dirtified, nothing to do.\n");
955         goto done;
956     }
957
958     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
959             || (device->fb.render_targets && surface == device->fb.render_targets[0]))
960     {
961         if (!surface->dirtyRect.left && !surface->dirtyRect.top
962                 && surface->dirtyRect.right == surface->resource.width
963                 && surface->dirtyRect.bottom == surface->resource.height)
964         {
965             fullsurface = TRUE;
966         }
967         else
968         {
969             /* TODO: Proper partial rectangle tracking. */
970             fullsurface = FALSE;
971             surface->flags |= SFLAG_INSYSMEM;
972         }
973
974         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
975
976         /* Partial rectangle tracking is not commonly implemented, it is only
977          * done for render targets. INSYSMEM was set before to tell
978          * surface_load_location() where to read the rectangle from.
979          * Indrawable is set because all modifications from the partial
980          * sysmem copy are written back to the drawable, thus the surface is
981          * merged again in the drawable. The sysmem copy is not fully up to
982          * date because only a subrectangle was read in Map(). */
983         if (!fullsurface)
984         {
985             surface_modify_location(surface, surface->draw_binding, TRUE);
986             surface_evict_sysmem(surface);
987         }
988
989         surface->dirtyRect.left = surface->resource.width;
990         surface->dirtyRect.top = surface->resource.height;
991         surface->dirtyRect.right = 0;
992         surface->dirtyRect.bottom = 0;
993     }
994     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
995     {
996         FIXME("Depth / stencil buffer locking is not implemented.\n");
997     }
998
999 done:
1000     /* Overlays have to be redrawn manually after changes with the GL implementation */
1001     if (surface->overlay_dest)
1002         surface_draw_overlay(surface);
1003 }
1004
1005 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1006 {
1007     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1008         return FALSE;
1009     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1010         return FALSE;
1011     return TRUE;
1012 }
1013
1014 static void wined3d_surface_depth_blt_fbo(const struct wined3d_device *device, struct wined3d_surface *src_surface,
1015         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1016 {
1017     const struct wined3d_gl_info *gl_info;
1018     struct wined3d_context *context;
1019     DWORD src_mask, dst_mask;
1020     GLbitfield gl_mask;
1021
1022     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1023             device, src_surface, wine_dbgstr_rect(src_rect),
1024             dst_surface, wine_dbgstr_rect(dst_rect));
1025
1026     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1027     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1028
1029     if (src_mask != dst_mask)
1030     {
1031         ERR("Incompatible formats %s and %s.\n",
1032                 debug_d3dformat(src_surface->resource.format->id),
1033                 debug_d3dformat(dst_surface->resource.format->id));
1034         return;
1035     }
1036
1037     if (!src_mask)
1038     {
1039         ERR("Not a depth / stencil format: %s.\n",
1040                 debug_d3dformat(src_surface->resource.format->id));
1041         return;
1042     }
1043
1044     gl_mask = 0;
1045     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1046         gl_mask |= GL_DEPTH_BUFFER_BIT;
1047     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1048         gl_mask |= GL_STENCIL_BUFFER_BIT;
1049
1050     /* Make sure the locations are up-to-date. Loading the destination
1051      * surface isn't required if the entire surface is overwritten. */
1052     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1053     if (!surface_is_full_rect(dst_surface, dst_rect))
1054         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1055
1056     context = context_acquire(device, NULL);
1057     if (!context->valid)
1058     {
1059         context_release(context);
1060         WARN("Invalid context, skipping blit.\n");
1061         return;
1062     }
1063
1064     gl_info = context->gl_info;
1065
1066     ENTER_GL();
1067
1068     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1069     glReadBuffer(GL_NONE);
1070     checkGLcall("glReadBuffer()");
1071     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1072
1073     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1074     context_set_draw_buffer(context, GL_NONE);
1075     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1076
1077     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1078     {
1079         glDepthMask(GL_TRUE);
1080         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_ZWRITEENABLE));
1081     }
1082     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1083     {
1084         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1085         {
1086             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1087             context_invalidate_state(context, STATE_RENDER(WINED3D_RS_TWOSIDEDSTENCILMODE));
1088         }
1089         glStencilMask(~0U);
1090         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_STENCILWRITEMASK));
1091     }
1092
1093     glDisable(GL_SCISSOR_TEST);
1094     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1095
1096     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1097             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1098     checkGLcall("glBlitFramebuffer()");
1099
1100     LEAVE_GL();
1101
1102     if (wined3d_settings.strict_draw_ordering)
1103         wglFlush(); /* Flush to ensure ordering across contexts. */
1104
1105     context_release(context);
1106 }
1107
1108 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1109  * Depth / stencil is not supported. */
1110 static void surface_blt_fbo(const struct wined3d_device *device, const WINED3DTEXTUREFILTERTYPE filter,
1111         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1112         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1113 {
1114     const struct wined3d_gl_info *gl_info;
1115     struct wined3d_context *context;
1116     RECT src_rect, dst_rect;
1117     GLenum gl_filter;
1118     GLenum buffer;
1119
1120     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1121     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1122             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1123     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1124             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1125
1126     src_rect = *src_rect_in;
1127     dst_rect = *dst_rect_in;
1128
1129     switch (filter)
1130     {
1131         case WINED3DTEXF_LINEAR:
1132             gl_filter = GL_LINEAR;
1133             break;
1134
1135         default:
1136             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1137         case WINED3DTEXF_NONE:
1138         case WINED3DTEXF_POINT:
1139             gl_filter = GL_NEAREST;
1140             break;
1141     }
1142
1143     /* Resolve the source surface first if needed. */
1144     if (src_location == SFLAG_INRB_MULTISAMPLE
1145             && (src_surface->resource.format->id != dst_surface->resource.format->id
1146                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1147                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1148         src_location = SFLAG_INRB_RESOLVED;
1149
1150     /* Make sure the locations are up-to-date. Loading the destination
1151      * surface isn't required if the entire surface is overwritten. (And is
1152      * in fact harmful if we're being called by surface_load_location() with
1153      * the purpose of loading the destination surface.) */
1154     surface_load_location(src_surface, src_location, NULL);
1155     if (!surface_is_full_rect(dst_surface, &dst_rect))
1156         surface_load_location(dst_surface, dst_location, NULL);
1157
1158     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1159     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1160     else context = context_acquire(device, NULL);
1161
1162     if (!context->valid)
1163     {
1164         context_release(context);
1165         WARN("Invalid context, skipping blit.\n");
1166         return;
1167     }
1168
1169     gl_info = context->gl_info;
1170
1171     if (src_location == SFLAG_INDRAWABLE)
1172     {
1173         TRACE("Source surface %p is onscreen.\n", src_surface);
1174         buffer = surface_get_gl_buffer(src_surface);
1175         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1176     }
1177     else
1178     {
1179         TRACE("Source surface %p is offscreen.\n", src_surface);
1180         buffer = GL_COLOR_ATTACHMENT0;
1181     }
1182
1183     ENTER_GL();
1184     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1185     glReadBuffer(buffer);
1186     checkGLcall("glReadBuffer()");
1187     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1188     LEAVE_GL();
1189
1190     if (dst_location == SFLAG_INDRAWABLE)
1191     {
1192         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1193         buffer = surface_get_gl_buffer(dst_surface);
1194         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1195     }
1196     else
1197     {
1198         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1199         buffer = GL_COLOR_ATTACHMENT0;
1200     }
1201
1202     ENTER_GL();
1203     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1204     context_set_draw_buffer(context, buffer);
1205     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1206     context_invalidate_state(context, STATE_FRAMEBUFFER);
1207
1208     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1209     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE));
1210     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE1));
1211     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE2));
1212     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE3));
1213
1214     glDisable(GL_SCISSOR_TEST);
1215     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1216
1217     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1218             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1219     checkGLcall("glBlitFramebuffer()");
1220
1221     LEAVE_GL();
1222
1223     if (wined3d_settings.strict_draw_ordering
1224             || (dst_location == SFLAG_INDRAWABLE
1225             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1226         wglFlush();
1227
1228     context_release(context);
1229 }
1230
1231 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1232         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
1233         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
1234 {
1235     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1236         return FALSE;
1237
1238     /* Source and/or destination need to be on the GL side */
1239     if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
1240         return FALSE;
1241
1242     switch (blit_op)
1243     {
1244         case WINED3D_BLIT_OP_COLOR_BLIT:
1245             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1246                 return FALSE;
1247             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1248                 return FALSE;
1249             break;
1250
1251         case WINED3D_BLIT_OP_DEPTH_BLIT:
1252             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1253                 return FALSE;
1254             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1255                 return FALSE;
1256             break;
1257
1258         default:
1259             return FALSE;
1260     }
1261
1262     if (!(src_format->id == dst_format->id
1263             || (is_identity_fixup(src_format->color_fixup)
1264             && is_identity_fixup(dst_format->color_fixup))))
1265         return FALSE;
1266
1267     return TRUE;
1268 }
1269
1270 /* This function checks if the primary render target uses the 8bit paletted format. */
1271 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1272 {
1273     if (device->fb.render_targets && device->fb.render_targets[0])
1274     {
1275         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1276         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1277                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1278             return TRUE;
1279     }
1280     return FALSE;
1281 }
1282
1283 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1284         DWORD color, struct wined3d_color *float_color)
1285 {
1286     const struct wined3d_format *format = surface->resource.format;
1287     const struct wined3d_device *device = surface->resource.device;
1288
1289     switch (format->id)
1290     {
1291         case WINED3DFMT_P8_UINT:
1292             if (surface->palette)
1293             {
1294                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1295                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1296                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1297             }
1298             else
1299             {
1300                 float_color->r = 0.0f;
1301                 float_color->g = 0.0f;
1302                 float_color->b = 0.0f;
1303             }
1304             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1305             break;
1306
1307         case WINED3DFMT_B5G6R5_UNORM:
1308             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1309             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1310             float_color->b = (color & 0x1f) / 31.0f;
1311             float_color->a = 1.0f;
1312             break;
1313
1314         case WINED3DFMT_B8G8R8_UNORM:
1315         case WINED3DFMT_B8G8R8X8_UNORM:
1316             float_color->r = D3DCOLOR_R(color);
1317             float_color->g = D3DCOLOR_G(color);
1318             float_color->b = D3DCOLOR_B(color);
1319             float_color->a = 1.0f;
1320             break;
1321
1322         case WINED3DFMT_B8G8R8A8_UNORM:
1323             float_color->r = D3DCOLOR_R(color);
1324             float_color->g = D3DCOLOR_G(color);
1325             float_color->b = D3DCOLOR_B(color);
1326             float_color->a = D3DCOLOR_A(color);
1327             break;
1328
1329         default:
1330             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1331             return FALSE;
1332     }
1333
1334     return TRUE;
1335 }
1336
1337 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1338 {
1339     const struct wined3d_format *format = surface->resource.format;
1340
1341     switch (format->id)
1342     {
1343         case WINED3DFMT_S1_UINT_D15_UNORM:
1344             *float_depth = depth / (float)0x00007fff;
1345             break;
1346
1347         case WINED3DFMT_D16_UNORM:
1348             *float_depth = depth / (float)0x0000ffff;
1349             break;
1350
1351         case WINED3DFMT_D24_UNORM_S8_UINT:
1352         case WINED3DFMT_X8D24_UNORM:
1353             *float_depth = depth / (float)0x00ffffff;
1354             break;
1355
1356         case WINED3DFMT_D32_UNORM:
1357             *float_depth = depth / (float)0xffffffff;
1358             break;
1359
1360         default:
1361             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1362             return FALSE;
1363     }
1364
1365     return TRUE;
1366 }
1367
1368 /* Do not call while under the GL lock. */
1369 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1370 {
1371     const struct wined3d_resource *resource = &surface->resource;
1372     struct wined3d_device *device = resource->device;
1373     const struct blit_shader *blitter;
1374
1375     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1376             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1377     if (!blitter)
1378     {
1379         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1380         return WINED3DERR_INVALIDCALL;
1381     }
1382
1383     return blitter->depth_fill(device, surface, rect, depth);
1384 }
1385
1386 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1387         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1388 {
1389     struct wined3d_device *device = src_surface->resource.device;
1390
1391     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1392             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1393             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1394         return WINED3DERR_INVALIDCALL;
1395
1396     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1397
1398     surface_modify_ds_location(dst_surface, SFLAG_INTEXTURE,
1399             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1400
1401     return WINED3D_OK;
1402 }
1403
1404 /* Do not call while under the GL lock. */
1405 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1406         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1407         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1408 {
1409     struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1410     struct wined3d_device *device = dst_surface->resource.device;
1411     DWORD src_ds_flags, dst_ds_flags;
1412     RECT src_rect, dst_rect;
1413     BOOL scale, convert;
1414
1415     static const DWORD simple_blit = WINEDDBLT_ASYNC
1416             | WINEDDBLT_COLORFILL
1417             | WINEDDBLT_WAIT
1418             | WINEDDBLT_DEPTHFILL
1419             | WINEDDBLT_DONOTWAIT;
1420
1421     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1422             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1423             flags, fx, debug_d3dtexturefiltertype(filter));
1424     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1425
1426     if (fx)
1427     {
1428         TRACE("dwSize %#x.\n", fx->dwSize);
1429         TRACE("dwDDFX %#x.\n", fx->dwDDFX);
1430         TRACE("dwROP %#x.\n", fx->dwROP);
1431         TRACE("dwDDROP %#x.\n", fx->dwDDROP);
1432         TRACE("dwRotationAngle %#x.\n", fx->dwRotationAngle);
1433         TRACE("dwZBufferOpCode %#x.\n", fx->dwZBufferOpCode);
1434         TRACE("dwZBufferLow %#x.\n", fx->dwZBufferLow);
1435         TRACE("dwZBufferHigh %#x.\n", fx->dwZBufferHigh);
1436         TRACE("dwZBufferBaseDest %#x.\n", fx->dwZBufferBaseDest);
1437         TRACE("dwZDestConstBitDepth %#x.\n", fx->dwZDestConstBitDepth);
1438         TRACE("lpDDSZBufferDest %p.\n", fx->u1.lpDDSZBufferDest);
1439         TRACE("dwZSrcConstBitDepth %#x.\n", fx->dwZSrcConstBitDepth);
1440         TRACE("lpDDSZBufferSrc %p.\n", fx->u2.lpDDSZBufferSrc);
1441         TRACE("dwAlphaEdgeBlendBitDepth %#x.\n", fx->dwAlphaEdgeBlendBitDepth);
1442         TRACE("dwAlphaEdgeBlend %#x.\n", fx->dwAlphaEdgeBlend);
1443         TRACE("dwReserved %#x.\n", fx->dwReserved);
1444         TRACE("dwAlphaDestConstBitDepth %#x.\n", fx->dwAlphaDestConstBitDepth);
1445         TRACE("lpDDSAlphaDest %p.\n", fx->u3.lpDDSAlphaDest);
1446         TRACE("dwAlphaSrcConstBitDepth %#x.\n", fx->dwAlphaSrcConstBitDepth);
1447         TRACE("lpDDSAlphaSrc %p.\n", fx->u4.lpDDSAlphaSrc);
1448         TRACE("lpDDSPattern %p.\n", fx->u5.lpDDSPattern);
1449         TRACE("ddckDestColorkey {%#x, %#x}.\n",
1450                 fx->ddckDestColorkey.color_space_low_value,
1451                 fx->ddckDestColorkey.color_space_high_value);
1452         TRACE("ddckSrcColorkey {%#x, %#x}.\n",
1453                 fx->ddckSrcColorkey.color_space_low_value,
1454                 fx->ddckSrcColorkey.color_space_high_value);
1455     }
1456
1457     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1458     {
1459         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1460         return WINEDDERR_SURFACEBUSY;
1461     }
1462
1463     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1464
1465     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1466             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1467             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1468             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1469             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1470     {
1471         WARN("The application gave us a bad destination rectangle.\n");
1472         return WINEDDERR_INVALIDRECT;
1473     }
1474
1475     if (src_surface)
1476     {
1477         surface_get_rect(src_surface, src_rect_in, &src_rect);
1478
1479         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1480                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1481                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1482                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1483                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1484         {
1485             WARN("Application gave us bad source rectangle for Blt.\n");
1486             return WINEDDERR_INVALIDRECT;
1487         }
1488     }
1489     else
1490     {
1491         memset(&src_rect, 0, sizeof(src_rect));
1492     }
1493
1494     if (!fx || !(fx->dwDDFX))
1495         flags &= ~WINEDDBLT_DDFX;
1496
1497     if (flags & WINEDDBLT_WAIT)
1498         flags &= ~WINEDDBLT_WAIT;
1499
1500     if (flags & WINEDDBLT_ASYNC)
1501     {
1502         static unsigned int once;
1503
1504         if (!once++)
1505             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1506         flags &= ~WINEDDBLT_ASYNC;
1507     }
1508
1509     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1510     if (flags & WINEDDBLT_DONOTWAIT)
1511     {
1512         static unsigned int once;
1513
1514         if (!once++)
1515             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1516         flags &= ~WINEDDBLT_DONOTWAIT;
1517     }
1518
1519     if (!device->d3d_initialized)
1520     {
1521         WARN("D3D not initialized, using fallback.\n");
1522         goto cpu;
1523     }
1524
1525     /* We want to avoid invalidating the sysmem location for converted
1526      * surfaces, since otherwise we'd have to convert the data back when
1527      * locking them. */
1528     if (dst_surface->flags & SFLAG_CONVERTED)
1529     {
1530         WARN("Converted surface, using CPU blit.\n");
1531         return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1532     }
1533
1534     if (flags & ~simple_blit)
1535     {
1536         WARN("Using fallback for complex blit (%#x).\n", flags);
1537         goto fallback;
1538     }
1539
1540     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1541         src_swapchain = src_surface->container.u.swapchain;
1542     else
1543         src_swapchain = NULL;
1544
1545     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1546         dst_swapchain = dst_surface->container.u.swapchain;
1547     else
1548         dst_swapchain = NULL;
1549
1550     /* This isn't strictly needed. FBO blits for example could deal with
1551      * cross-swapchain blits by first downloading the source to a texture
1552      * before switching to the destination context. We just have this here to
1553      * not have to deal with the issue, since cross-swapchain blits should be
1554      * rare. */
1555     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1556     {
1557         FIXME("Using fallback for cross-swapchain blit.\n");
1558         goto fallback;
1559     }
1560
1561     scale = src_surface
1562             && (src_rect.right - src_rect.left != dst_rect.right - dst_rect.left
1563             || src_rect.bottom - src_rect.top != dst_rect.bottom - dst_rect.top);
1564     convert = src_surface && src_surface->resource.format->id != dst_surface->resource.format->id;
1565
1566     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1567     if (src_surface)
1568         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1569     else
1570         src_ds_flags = 0;
1571
1572     if (src_ds_flags || dst_ds_flags)
1573     {
1574         if (flags & WINEDDBLT_DEPTHFILL)
1575         {
1576             float depth;
1577
1578             TRACE("Depth fill.\n");
1579
1580             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1581                 return WINED3DERR_INVALIDCALL;
1582
1583             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1584                 return WINED3D_OK;
1585         }
1586         else
1587         {
1588             /* Accessing depth / stencil surfaces is supposed to fail while in
1589              * a scene, except for fills, which seem to work. */
1590             if (device->inScene)
1591             {
1592                 WARN("Rejecting depth / stencil access while in scene.\n");
1593                 return WINED3DERR_INVALIDCALL;
1594             }
1595
1596             if (src_ds_flags != dst_ds_flags)
1597             {
1598                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1599                 return WINED3DERR_INVALIDCALL;
1600             }
1601
1602             if (src_rect.top || src_rect.left
1603                     || src_rect.bottom != src_surface->resource.height
1604                     || src_rect.right != src_surface->resource.width)
1605             {
1606                 WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
1607                         wine_dbgstr_rect(&src_rect));
1608                 return WINED3DERR_INVALIDCALL;
1609             }
1610
1611             if (dst_rect.top || dst_rect.left
1612                     || dst_rect.bottom != dst_surface->resource.height
1613                     || dst_rect.right != dst_surface->resource.width)
1614             {
1615                 WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
1616                         wine_dbgstr_rect(&src_rect));
1617                 return WINED3DERR_INVALIDCALL;
1618             }
1619
1620             if (scale)
1621             {
1622                 WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
1623                 return WINED3DERR_INVALIDCALL;
1624             }
1625
1626             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1627                 return WINED3D_OK;
1628         }
1629     }
1630     else
1631     {
1632         /* In principle this would apply to depth blits as well, but we don't
1633          * implement those in the CPU blitter at the moment. */
1634         if ((dst_surface->flags & SFLAG_INSYSMEM)
1635                 && (!src_surface || (src_surface->flags & SFLAG_INSYSMEM)))
1636         {
1637             if (scale)
1638                 TRACE("Not doing sysmem blit because of scaling.\n");
1639             else if (convert)
1640                 TRACE("Not doing sysmem blit because of format conversion.\n");
1641             else
1642                 return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1643         }
1644
1645         if (flags & WINEDDBLT_COLORFILL)
1646         {
1647             struct wined3d_color color;
1648
1649             TRACE("Color fill.\n");
1650
1651             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1652                 goto fallback;
1653
1654             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1655                 return WINED3D_OK;
1656         }
1657         else
1658         {
1659             TRACE("Color blit.\n");
1660
1661             /* Upload */
1662             if ((src_surface->flags & SFLAG_INSYSMEM) && !(dst_surface->flags & SFLAG_INSYSMEM))
1663             {
1664                 if (scale)
1665                     TRACE("Not doing upload because of scaling.\n");
1666                 else if (convert)
1667                     TRACE("Not doing upload because of format conversion.\n");
1668                 else
1669                 {
1670                     POINT dst_point = {dst_rect.left, dst_rect.top};
1671
1672                     if (SUCCEEDED(surface_upload_from_surface(dst_surface, &dst_point, src_surface, &src_rect)))
1673                     {
1674                         if (!surface_is_offscreen(dst_surface))
1675                             surface_load_location(dst_surface, dst_surface->draw_binding, NULL);
1676                         return WINED3D_OK;
1677                     }
1678                 }
1679             }
1680
1681             /* Use present for back -> front blits. The idea behind this is
1682              * that present is potentially faster than a blit, in particular
1683              * when FBO blits aren't available. Some ddraw applications like
1684              * Half-Life and Prince of Persia 3D use Blt() from the backbuffer
1685              * to the frontbuffer instead of doing a Flip(). D3D8 and D3D9
1686              * applications can't blit directly to the frontbuffer. */
1687             if (dst_swapchain && dst_swapchain->back_buffers
1688                     && dst_surface == dst_swapchain->front_buffer
1689                     && src_surface == dst_swapchain->back_buffers[0])
1690             {
1691                 enum wined3d_swap_effect swap_effect = dst_swapchain->desc.swap_effect;
1692
1693                 TRACE("Using present for backbuffer -> frontbuffer blit.\n");
1694
1695                 /* Set the swap effect to COPY, we don't want the backbuffer
1696                  * to become undefined. */
1697                 dst_swapchain->desc.swap_effect = WINED3D_SWAP_EFFECT_COPY;
1698                 wined3d_swapchain_present(dst_swapchain, NULL, NULL, dst_swapchain->win_handle, NULL, 0);
1699                 dst_swapchain->desc.swap_effect = swap_effect;
1700
1701                 return WINED3D_OK;
1702             }
1703
1704             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1705                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1706                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1707             {
1708                 TRACE("Using FBO blit.\n");
1709
1710                 surface_blt_fbo(device, filter,
1711                         src_surface, src_surface->draw_binding, &src_rect,
1712                         dst_surface, dst_surface->draw_binding, &dst_rect);
1713                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1714                 return WINED3D_OK;
1715             }
1716
1717             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1718                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1719                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1720             {
1721                 TRACE("Using arbfp blit.\n");
1722
1723                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1724                     return WINED3D_OK;
1725             }
1726         }
1727     }
1728
1729 fallback:
1730
1731     /* Special cases for render targets. */
1732     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1733             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1734     {
1735         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1736                 src_surface, &src_rect, flags, fx, filter)))
1737             return WINED3D_OK;
1738     }
1739
1740 cpu:
1741
1742     /* For the rest call the X11 surface implementation. For render targets
1743      * this should be implemented OpenGL accelerated in BltOverride, other
1744      * blits are rather rare. */
1745     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1746 }
1747
1748 HRESULT CDECL wined3d_surface_get_render_target_data(struct wined3d_surface *surface,
1749         struct wined3d_surface *render_target)
1750 {
1751     TRACE("surface %p, render_target %p.\n", surface, render_target);
1752
1753     /* TODO: Check surface sizes, pools, etc. */
1754
1755     if (render_target->resource.multisample_type)
1756         return WINED3DERR_INVALIDCALL;
1757
1758     return wined3d_surface_blt(surface, NULL, render_target, NULL, 0, NULL, WINED3DTEXF_POINT);
1759 }
1760
1761 /* Context activation is done by the caller. */
1762 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1763 {
1764     if (surface->flags & SFLAG_DIBSECTION)
1765     {
1766         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1767     }
1768     else
1769     {
1770         if (!surface->resource.heapMemory)
1771             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1772         else if (!(surface->flags & SFLAG_CLIENT))
1773             ERR("Surface %p has heapMemory %p and flags %#x.\n",
1774                     surface, surface->resource.heapMemory, surface->flags);
1775
1776         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1777                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1778     }
1779
1780     ENTER_GL();
1781     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1782     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1783     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1784             surface->resource.size, surface->resource.allocatedMemory));
1785     checkGLcall("glGetBufferSubDataARB");
1786     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1787     checkGLcall("glDeleteBuffersARB");
1788     LEAVE_GL();
1789
1790     surface->pbo = 0;
1791     surface->flags &= ~SFLAG_PBO;
1792 }
1793
1794 /* Do not call while under the GL lock. */
1795 static void surface_unload(struct wined3d_resource *resource)
1796 {
1797     struct wined3d_surface *surface = surface_from_resource(resource);
1798     struct wined3d_renderbuffer_entry *entry, *entry2;
1799     struct wined3d_device *device = resource->device;
1800     const struct wined3d_gl_info *gl_info;
1801     struct wined3d_context *context;
1802
1803     TRACE("surface %p.\n", surface);
1804
1805     if (resource->pool == WINED3DPOOL_DEFAULT)
1806     {
1807         /* Default pool resources are supposed to be destroyed before Reset is called.
1808          * Implicit resources stay however. So this means we have an implicit render target
1809          * or depth stencil. The content may be destroyed, but we still have to tear down
1810          * opengl resources, so we cannot leave early.
1811          *
1812          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1813          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1814          * or the depth stencil into an FBO the texture or render buffer will be removed
1815          * and all flags get lost
1816          */
1817         if (!(surface->flags & SFLAG_PBO))
1818             surface_init_sysmem(surface);
1819         /* We also get here when the ddraw swapchain is destroyed, for example
1820          * for a mode switch. In this case this surface won't necessarily be
1821          * an implicit surface. We have to mark it lost so that the
1822          * application can restore it after the mode switch. */
1823         surface->flags |= SFLAG_LOST;
1824     }
1825     else
1826     {
1827         /* Load the surface into system memory */
1828         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1829         surface_modify_location(surface, surface->draw_binding, FALSE);
1830     }
1831     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1832     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1833     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1834
1835     context = context_acquire(device, NULL);
1836     gl_info = context->gl_info;
1837
1838     /* Destroy PBOs, but load them into real sysmem before */
1839     if (surface->flags & SFLAG_PBO)
1840         surface_remove_pbo(surface, gl_info);
1841
1842     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1843      * all application-created targets the application has to release the surface
1844      * before calling _Reset
1845      */
1846     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1847     {
1848         ENTER_GL();
1849         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1850         LEAVE_GL();
1851         list_remove(&entry->entry);
1852         HeapFree(GetProcessHeap(), 0, entry);
1853     }
1854     list_init(&surface->renderbuffers);
1855     surface->current_renderbuffer = NULL;
1856
1857     ENTER_GL();
1858
1859     /* If we're in a texture, the texture name belongs to the texture.
1860      * Otherwise, destroy it. */
1861     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1862     {
1863         glDeleteTextures(1, &surface->texture_name);
1864         surface->texture_name = 0;
1865         glDeleteTextures(1, &surface->texture_name_srgb);
1866         surface->texture_name_srgb = 0;
1867     }
1868     if (surface->rb_multisample)
1869     {
1870         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1871         surface->rb_multisample = 0;
1872     }
1873     if (surface->rb_resolved)
1874     {
1875         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1876         surface->rb_resolved = 0;
1877     }
1878
1879     LEAVE_GL();
1880
1881     context_release(context);
1882
1883     resource_unload(resource);
1884 }
1885
1886 static const struct wined3d_resource_ops surface_resource_ops =
1887 {
1888     surface_unload,
1889 };
1890
1891 static const struct wined3d_surface_ops surface_ops =
1892 {
1893     surface_private_setup,
1894     surface_realize_palette,
1895     surface_map,
1896     surface_unmap,
1897 };
1898
1899 /*****************************************************************************
1900  * Initializes the GDI surface, aka creates the DIB section we render to
1901  * The DIB section creation is done by calling GetDC, which will create the
1902  * section and releasing the dc to allow the app to use it. The dib section
1903  * will stay until the surface is released
1904  *
1905  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1906  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1907  * avoid confusion in the shared surface code.
1908  *
1909  * Returns:
1910  *  WINED3D_OK on success
1911  *  The return values of called methods on failure
1912  *
1913  *****************************************************************************/
1914 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1915 {
1916     HRESULT hr;
1917
1918     TRACE("surface %p.\n", surface);
1919
1920     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1921     {
1922         ERR("Overlays not yet supported by GDI surfaces.\n");
1923         return WINED3DERR_INVALIDCALL;
1924     }
1925
1926     /* Sysmem textures have memory already allocated - release it,
1927      * this avoids an unnecessary memcpy. */
1928     hr = surface_create_dib_section(surface);
1929     if (SUCCEEDED(hr))
1930     {
1931         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1932         surface->resource.heapMemory = NULL;
1933         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1934     }
1935
1936     /* We don't mind the nonpow2 stuff in GDI. */
1937     surface->pow2Width = surface->resource.width;
1938     surface->pow2Height = surface->resource.height;
1939
1940     return WINED3D_OK;
1941 }
1942
1943 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1944 {
1945     struct wined3d_palette *palette = surface->palette;
1946
1947     TRACE("surface %p.\n", surface);
1948
1949     if (!palette) return;
1950
1951     if (surface->flags & SFLAG_DIBSECTION)
1952     {
1953         RGBQUAD col[256];
1954         unsigned int i;
1955
1956         TRACE("Updating the DC's palette.\n");
1957
1958         for (i = 0; i < 256; ++i)
1959         {
1960             col[i].rgbRed = palette->palents[i].peRed;
1961             col[i].rgbGreen = palette->palents[i].peGreen;
1962             col[i].rgbBlue = palette->palents[i].peBlue;
1963             col[i].rgbReserved = 0;
1964         }
1965         SetDIBColorTable(surface->hDC, 0, 256, col);
1966     }
1967
1968     /* Update the image because of the palette change. Some games like e.g.
1969      * Red Alert call SetEntries a lot to implement fading. */
1970     /* Tell the swapchain to update the screen. */
1971     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1972     {
1973         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1974         if (surface == swapchain->front_buffer)
1975         {
1976             x11_copy_to_screen(swapchain, NULL);
1977         }
1978     }
1979 }
1980
1981 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
1982 {
1983     TRACE("surface %p, rect %s, flags %#x.\n",
1984             surface, wine_dbgstr_rect(rect), flags);
1985
1986     if (!(surface->flags & SFLAG_DIBSECTION))
1987     {
1988         /* This happens on gdi surfaces if the application set a user pointer
1989          * and resets it. Recreate the DIB section. */
1990         surface_create_dib_section(surface);
1991         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1992     }
1993 }
1994
1995 static void gdi_surface_unmap(struct wined3d_surface *surface)
1996 {
1997     TRACE("surface %p.\n", surface);
1998
1999     /* Tell the swapchain to update the screen. */
2000     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2001     {
2002         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2003         if (surface == swapchain->front_buffer)
2004         {
2005             x11_copy_to_screen(swapchain, &surface->lockedRect);
2006         }
2007     }
2008
2009     memset(&surface->lockedRect, 0, sizeof(RECT));
2010 }
2011
2012 static const struct wined3d_surface_ops gdi_surface_ops =
2013 {
2014     gdi_surface_private_setup,
2015     gdi_surface_realize_palette,
2016     gdi_surface_map,
2017     gdi_surface_unmap,
2018 };
2019
2020 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2021 {
2022     GLuint *name;
2023     DWORD flag;
2024
2025     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2026
2027     if(srgb)
2028     {
2029         name = &surface->texture_name_srgb;
2030         flag = SFLAG_INSRGBTEX;
2031     }
2032     else
2033     {
2034         name = &surface->texture_name;
2035         flag = SFLAG_INTEXTURE;
2036     }
2037
2038     if (!*name && new_name)
2039     {
2040         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2041          * surface has no texture name yet. See if we can get rid of this. */
2042         if (surface->flags & flag)
2043         {
2044             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2045             surface_modify_location(surface, flag, FALSE);
2046         }
2047     }
2048
2049     *name = new_name;
2050     surface_force_reload(surface);
2051 }
2052
2053 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
2054 {
2055     TRACE("surface %p, target %#x.\n", surface, target);
2056
2057     if (surface->texture_target != target)
2058     {
2059         if (target == GL_TEXTURE_RECTANGLE_ARB)
2060         {
2061             surface->flags &= ~SFLAG_NORMCOORD;
2062         }
2063         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2064         {
2065             surface->flags |= SFLAG_NORMCOORD;
2066         }
2067     }
2068     surface->texture_target = target;
2069     surface_force_reload(surface);
2070 }
2071
2072 /* Context activation is done by the caller. */
2073 void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
2074 {
2075     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
2076
2077     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2078     {
2079         struct wined3d_texture *texture = surface->container.u.texture;
2080
2081         TRACE("Passing to container (%p).\n", texture);
2082         texture->texture_ops->texture_bind(texture, context, srgb);
2083     }
2084     else
2085     {
2086         if (surface->texture_level)
2087         {
2088             ERR("Standalone surface %p is non-zero texture level %u.\n",
2089                     surface, surface->texture_level);
2090         }
2091
2092         if (srgb)
2093             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
2094
2095         ENTER_GL();
2096
2097         if (!surface->texture_name)
2098         {
2099             glGenTextures(1, &surface->texture_name);
2100             checkGLcall("glGenTextures");
2101
2102             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
2103
2104             context_bind_texture(context, surface->texture_target, surface->texture_name);
2105             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2106             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2107             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
2108             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2109             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2110             checkGLcall("glTexParameteri");
2111         }
2112         else
2113         {
2114             context_bind_texture(context, surface->texture_target, surface->texture_name);
2115         }
2116
2117         LEAVE_GL();
2118     }
2119 }
2120
2121 /* This call just downloads data, the caller is responsible for binding the
2122  * correct texture. */
2123 /* Context activation is done by the caller. */
2124 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2125 {
2126     const struct wined3d_format *format = surface->resource.format;
2127
2128     /* Only support read back of converted P8 surfaces. */
2129     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2130     {
2131         ERR("Trying to read back converted surface %p with format %s.\n", surface, debug_d3dformat(format->id));
2132         return;
2133     }
2134
2135     ENTER_GL();
2136
2137     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2138     {
2139         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2140                 surface, surface->texture_level, format->glFormat, format->glType,
2141                 surface->resource.allocatedMemory);
2142
2143         if (surface->flags & SFLAG_PBO)
2144         {
2145             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2146             checkGLcall("glBindBufferARB");
2147             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2148             checkGLcall("glGetCompressedTexImageARB");
2149             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2150             checkGLcall("glBindBufferARB");
2151         }
2152         else
2153         {
2154             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2155                     surface->texture_level, surface->resource.allocatedMemory));
2156             checkGLcall("glGetCompressedTexImageARB");
2157         }
2158
2159         LEAVE_GL();
2160     }
2161     else
2162     {
2163         void *mem;
2164         GLenum gl_format = format->glFormat;
2165         GLenum gl_type = format->glType;
2166         int src_pitch = 0;
2167         int dst_pitch = 0;
2168
2169         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2170         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2171         {
2172             gl_format = GL_ALPHA;
2173             gl_type = GL_UNSIGNED_BYTE;
2174         }
2175
2176         if (surface->flags & SFLAG_NONPOW2)
2177         {
2178             unsigned char alignment = surface->resource.device->surface_alignment;
2179             src_pitch = format->byte_count * surface->pow2Width;
2180             dst_pitch = wined3d_surface_get_pitch(surface);
2181             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2182             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2183         }
2184         else
2185         {
2186             mem = surface->resource.allocatedMemory;
2187         }
2188
2189         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2190                 surface, surface->texture_level, gl_format, gl_type, mem);
2191
2192         if (surface->flags & SFLAG_PBO)
2193         {
2194             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2195             checkGLcall("glBindBufferARB");
2196
2197             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2198             checkGLcall("glGetTexImage");
2199
2200             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2201             checkGLcall("glBindBufferARB");
2202         }
2203         else
2204         {
2205             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2206             checkGLcall("glGetTexImage");
2207         }
2208         LEAVE_GL();
2209
2210         if (surface->flags & SFLAG_NONPOW2)
2211         {
2212             const BYTE *src_data;
2213             BYTE *dst_data;
2214             UINT y;
2215             /*
2216              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2217              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2218              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2219              *
2220              * We're doing this...
2221              *
2222              * instead of boxing the texture :
2223              * |<-texture width ->|  -->pow2width|   /\
2224              * |111111111111111111|              |   |
2225              * |222 Texture 222222| boxed empty  | texture height
2226              * |3333 Data 33333333|              |   |
2227              * |444444444444444444|              |   \/
2228              * -----------------------------------   |
2229              * |     boxed  empty | boxed empty  | pow2height
2230              * |                  |              |   \/
2231              * -----------------------------------
2232              *
2233              *
2234              * we're repacking the data to the expected texture width
2235              *
2236              * |<-texture width ->|  -->pow2width|   /\
2237              * |111111111111111111222222222222222|   |
2238              * |222333333333333333333444444444444| texture height
2239              * |444444                           |   |
2240              * |                                 |   \/
2241              * |                                 |   |
2242              * |            empty                | pow2height
2243              * |                                 |   \/
2244              * -----------------------------------
2245              *
2246              * == is the same as
2247              *
2248              * |<-texture width ->|    /\
2249              * |111111111111111111|
2250              * |222222222222222222|texture height
2251              * |333333333333333333|
2252              * |444444444444444444|    \/
2253              * --------------------
2254              *
2255              * this also means that any references to allocatedMemory should work with the data as if were a
2256              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2257              *
2258              * internally the texture is still stored in a boxed format so any references to textureName will
2259              * get a boxed texture with width pow2width and not a texture of width resource.width.
2260              *
2261              * Performance should not be an issue, because applications normally do not lock the surfaces when
2262              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2263              * and doesn't have to be re-read. */
2264             src_data = mem;
2265             dst_data = surface->resource.allocatedMemory;
2266             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2267             for (y = 1; y < surface->resource.height; ++y)
2268             {
2269                 /* skip the first row */
2270                 src_data += src_pitch;
2271                 dst_data += dst_pitch;
2272                 memcpy(dst_data, src_data, dst_pitch);
2273             }
2274
2275             HeapFree(GetProcessHeap(), 0, mem);
2276         }
2277     }
2278
2279     /* Surface has now been downloaded */
2280     surface->flags |= SFLAG_INSYSMEM;
2281 }
2282
2283 /* This call just uploads data, the caller is responsible for binding the
2284  * correct texture. */
2285 /* Context activation is done by the caller. */
2286 static void surface_upload_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2287         const struct wined3d_format *format, const RECT *src_rect, UINT src_pitch, const POINT *dst_point,
2288         BOOL srgb, const struct wined3d_bo_address *data)
2289 {
2290     UINT update_w = src_rect->right - src_rect->left;
2291     UINT update_h = src_rect->bottom - src_rect->top;
2292
2293     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_pitch %u, dst_point %s, srgb %#x, data {%#x:%p}.\n",
2294             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_pitch,
2295             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2296
2297     if (surface->flags & SFLAG_LOCKED)
2298     {
2299         WARN("Uploading a surface that is currently mapped, setting SFLAG_PIN_SYSMEM.\n");
2300         surface->flags |= SFLAG_PIN_SYSMEM;
2301     }
2302
2303     if (format->heightscale != 1.0f && format->heightscale != 0.0f)
2304         update_h *= format->heightscale;
2305
2306     ENTER_GL();
2307
2308     if (data->buffer_object)
2309     {
2310         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2311         checkGLcall("glBindBufferARB");
2312     }
2313
2314     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2315     {
2316         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2317         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2318         const BYTE *addr = data->addr;
2319         GLenum internal;
2320
2321         addr += (src_rect->top / format->block_height) * src_pitch;
2322         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2323
2324         if (srgb)
2325             internal = format->glGammaInternal;
2326         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2327             internal = format->rtInternal;
2328         else
2329             internal = format->glInternal;
2330
2331         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2332                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2333                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2334
2335         if (row_length == src_pitch)
2336         {
2337             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2338                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2339         }
2340         else
2341         {
2342             UINT row, y;
2343
2344             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2345              * can't use the unpack row length like below. */
2346             for (row = 0, y = dst_point->y; row < row_count; ++row)
2347             {
2348                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2349                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2350                 y += format->block_height;
2351                 addr += src_pitch;
2352             }
2353         }
2354         checkGLcall("glCompressedTexSubImage2DARB");
2355     }
2356     else
2357     {
2358         const BYTE *addr = data->addr;
2359
2360         addr += src_rect->top * src_pitch;
2361         addr += src_rect->left * format->byte_count;
2362
2363         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2364                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2365                 update_w, update_h, format->glFormat, format->glType, addr);
2366
2367         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_pitch / format->byte_count);
2368         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2369                 update_w, update_h, format->glFormat, format->glType, addr);
2370         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2371         checkGLcall("glTexSubImage2D");
2372     }
2373
2374     if (data->buffer_object)
2375     {
2376         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2377         checkGLcall("glBindBufferARB");
2378     }
2379
2380     LEAVE_GL();
2381
2382     if (wined3d_settings.strict_draw_ordering)
2383         wglFlush();
2384
2385     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2386     {
2387         struct wined3d_device *device = surface->resource.device;
2388         unsigned int i;
2389
2390         for (i = 0; i < device->context_count; ++i)
2391         {
2392             context_surface_update(device->contexts[i], surface);
2393         }
2394     }
2395 }
2396
2397 HRESULT surface_upload_from_surface(struct wined3d_surface *dst_surface, const POINT *dst_point,
2398         struct wined3d_surface *src_surface, const RECT *src_rect)
2399 {
2400     const struct wined3d_format *src_format;
2401     const struct wined3d_format *dst_format;
2402     const struct wined3d_gl_info *gl_info;
2403     struct wined3d_context *context;
2404     struct wined3d_bo_address data;
2405     struct wined3d_format format;
2406     UINT update_w, update_h;
2407     CONVERT_TYPES convert;
2408     UINT dst_w, dst_h;
2409     UINT src_w, src_h;
2410     UINT src_pitch;
2411     POINT p;
2412     RECT r;
2413
2414     TRACE("dst_surface %p, dst_point %s, src_surface %p, src_rect %s.\n",
2415             dst_surface, wine_dbgstr_point(dst_point),
2416             src_surface, wine_dbgstr_rect(src_rect));
2417
2418     src_format = src_surface->resource.format;
2419     dst_format = dst_surface->resource.format;
2420
2421     if (src_format->id != dst_format->id)
2422     {
2423         WARN("Source and destination surfaces should have the same format.\n");
2424         return WINED3DERR_INVALIDCALL;
2425     }
2426
2427     if (!dst_point)
2428     {
2429         p.x = 0;
2430         p.y = 0;
2431         dst_point = &p;
2432     }
2433     else if (dst_point->x < 0 || dst_point->y < 0)
2434     {
2435         WARN("Invalid destination point.\n");
2436         return WINED3DERR_INVALIDCALL;
2437     }
2438
2439     if (!src_rect)
2440     {
2441         r.left = 0;
2442         r.top = 0;
2443         r.right = src_surface->resource.width;
2444         r.bottom = src_surface->resource.height;
2445         src_rect = &r;
2446     }
2447     else if (src_rect->left < 0 || src_rect->left >= src_rect->right
2448             || src_rect->top < 0 || src_rect->top >= src_rect->bottom)
2449     {
2450         WARN("Invalid source rectangle.\n");
2451         return WINED3DERR_INVALIDCALL;
2452     }
2453
2454     src_w = src_surface->resource.width;
2455     src_h = src_surface->resource.height;
2456
2457     dst_w = dst_surface->resource.width;
2458     dst_h = dst_surface->resource.height;
2459
2460     update_w = src_rect->right - src_rect->left;
2461     update_h = src_rect->bottom - src_rect->top;
2462
2463     if (update_w > dst_w || dst_point->x > dst_w - update_w
2464             || update_h > dst_h || dst_point->y > dst_h - update_h)
2465     {
2466         WARN("Destination out of bounds.\n");
2467         return WINED3DERR_INVALIDCALL;
2468     }
2469
2470     /* NPOT block sizes would be silly. */
2471     if ((src_format->flags & WINED3DFMT_FLAG_BLOCKS)
2472             && ((update_w & (src_format->block_width - 1) || update_h & (src_format->block_height - 1))
2473             && (src_w != update_w || dst_w != update_w || src_h != update_h || dst_h != update_h)))
2474     {
2475         WARN("Update rect not block-aligned.\n");
2476         return WINED3DERR_INVALIDCALL;
2477     }
2478
2479     /* Use wined3d_surface_blt() instead of uploading directly if we need conversion. */
2480     d3dfmt_get_conv(dst_surface, FALSE, TRUE, &format, &convert);
2481     if (convert != NO_CONVERSION || format.convert)
2482     {
2483         RECT dst_rect = {dst_point->x,  dst_point->y, dst_point->x + update_w, dst_point->y + update_h};
2484         return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, src_rect, 0, NULL, WINED3DTEXF_POINT);
2485     }
2486
2487     context = context_acquire(dst_surface->resource.device, NULL);
2488     gl_info = context->gl_info;
2489
2490     /* Only load the surface for partial updates. For newly allocated texture
2491      * the texture wouldn't be the current location, and we'd upload zeroes
2492      * just to overwrite them again. */
2493     if (update_w == dst_w && update_h == dst_h)
2494         surface_prepare_texture(dst_surface, context, FALSE);
2495     else
2496         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
2497     surface_bind(dst_surface, context, FALSE);
2498
2499     data.buffer_object = src_surface->pbo;
2500     data.addr = src_surface->resource.allocatedMemory;
2501     src_pitch = wined3d_surface_get_pitch(src_surface);
2502
2503     surface_upload_data(dst_surface, gl_info, src_format, src_rect, src_pitch, dst_point, FALSE, &data);
2504
2505     invalidate_active_texture(dst_surface->resource.device, context);
2506
2507     context_release(context);
2508
2509     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
2510     return WINED3D_OK;
2511 }
2512
2513 /* This call just allocates the texture, the caller is responsible for binding
2514  * the correct texture. */
2515 /* Context activation is done by the caller. */
2516 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2517         const struct wined3d_format *format, BOOL srgb)
2518 {
2519     BOOL enable_client_storage = FALSE;
2520     GLsizei width = surface->pow2Width;
2521     GLsizei height = surface->pow2Height;
2522     const BYTE *mem = NULL;
2523     GLenum internal;
2524
2525     if (srgb)
2526     {
2527         internal = format->glGammaInternal;
2528     }
2529     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2530     {
2531         internal = format->rtInternal;
2532     }
2533     else
2534     {
2535         internal = format->glInternal;
2536     }
2537
2538     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2539
2540     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2541             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2542             internal, width, height, format->glFormat, format->glType);
2543
2544     ENTER_GL();
2545
2546     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2547     {
2548         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2549                 || !surface->resource.allocatedMemory)
2550         {
2551             /* In some cases we want to disable client storage.
2552              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2553              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2554              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2555              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2556              */
2557             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2558             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2559             surface->flags &= ~SFLAG_CLIENT;
2560             enable_client_storage = TRUE;
2561         }
2562         else
2563         {
2564             surface->flags |= SFLAG_CLIENT;
2565
2566             /* Point OpenGL to our allocated texture memory. Do not use
2567              * resource.allocatedMemory here because it might point into a
2568              * PBO. Instead use heapMemory, but get the alignment right. */
2569             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2570                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2571         }
2572     }
2573
2574     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2575     {
2576         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2577                 internal, width, height, 0, surface->resource.size, mem));
2578         checkGLcall("glCompressedTexImage2DARB");
2579     }
2580     else
2581     {
2582         glTexImage2D(surface->texture_target, surface->texture_level,
2583                 internal, width, height, 0, format->glFormat, format->glType, mem);
2584         checkGLcall("glTexImage2D");
2585     }
2586
2587     if(enable_client_storage) {
2588         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2589         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2590     }
2591     LEAVE_GL();
2592 }
2593
2594 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2595  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2596 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2597 /* GL locking is done by the caller */
2598 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2599 {
2600     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2601     struct wined3d_renderbuffer_entry *entry;
2602     GLuint renderbuffer = 0;
2603     unsigned int src_width, src_height;
2604     unsigned int width, height;
2605
2606     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2607     {
2608         width = rt->pow2Width;
2609         height = rt->pow2Height;
2610     }
2611     else
2612     {
2613         width = surface->pow2Width;
2614         height = surface->pow2Height;
2615     }
2616
2617     src_width = surface->pow2Width;
2618     src_height = surface->pow2Height;
2619
2620     /* A depth stencil smaller than the render target is not valid */
2621     if (width > src_width || height > src_height) return;
2622
2623     /* Remove any renderbuffer set if the sizes match */
2624     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2625             || (width == src_width && height == src_height))
2626     {
2627         surface->current_renderbuffer = NULL;
2628         return;
2629     }
2630
2631     /* Look if we've already got a renderbuffer of the correct dimensions */
2632     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2633     {
2634         if (entry->width == width && entry->height == height)
2635         {
2636             renderbuffer = entry->id;
2637             surface->current_renderbuffer = entry;
2638             break;
2639         }
2640     }
2641
2642     if (!renderbuffer)
2643     {
2644         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2645         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2646         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2647                 surface->resource.format->glInternal, width, height);
2648
2649         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2650         entry->width = width;
2651         entry->height = height;
2652         entry->id = renderbuffer;
2653         list_add_head(&surface->renderbuffers, &entry->entry);
2654
2655         surface->current_renderbuffer = entry;
2656     }
2657
2658     checkGLcall("set_compatible_renderbuffer");
2659 }
2660
2661 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2662 {
2663     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2664
2665     TRACE("surface %p.\n", surface);
2666
2667     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2668     {
2669         ERR("Surface %p is not on a swapchain.\n", surface);
2670         return GL_NONE;
2671     }
2672
2673     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2674     {
2675         if (swapchain->render_to_fbo)
2676         {
2677             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2678             return GL_COLOR_ATTACHMENT0;
2679         }
2680         TRACE("Returning GL_BACK\n");
2681         return GL_BACK;
2682     }
2683     else if (surface == swapchain->front_buffer)
2684     {
2685         TRACE("Returning GL_FRONT\n");
2686         return GL_FRONT;
2687     }
2688
2689     FIXME("Higher back buffer, returning GL_BACK\n");
2690     return GL_BACK;
2691 }
2692
2693 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2694 void surface_add_dirty_rect(struct wined3d_surface *surface, const struct wined3d_box *dirty_rect)
2695 {
2696     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2697
2698     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2699         /* No partial locking for textures yet. */
2700         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2701
2702     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2703     if (dirty_rect)
2704     {
2705         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->left);
2706         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->top);
2707         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->right);
2708         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->bottom);
2709     }
2710     else
2711     {
2712         surface->dirtyRect.left = 0;
2713         surface->dirtyRect.top = 0;
2714         surface->dirtyRect.right = surface->resource.width;
2715         surface->dirtyRect.bottom = surface->resource.height;
2716     }
2717
2718     /* if the container is a texture then mark it dirty. */
2719     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2720     {
2721         TRACE("Passing to container.\n");
2722         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2723     }
2724 }
2725
2726 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2727 {
2728     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2729     BOOL ck_changed;
2730
2731     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2732
2733     if (surface->resource.pool == WINED3DPOOL_SCRATCH)
2734     {
2735         ERR("Not supported on scratch surfaces.\n");
2736         return WINED3DERR_INVALIDCALL;
2737     }
2738
2739     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2740
2741     /* Reload if either the texture and sysmem have different ideas about the
2742      * color key, or the actual key values changed. */
2743     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2744             && (surface->gl_color_key.color_space_low_value != surface->src_blt_color_key.color_space_low_value
2745             || surface->gl_color_key.color_space_high_value != surface->src_blt_color_key.color_space_high_value)))
2746     {
2747         TRACE("Reloading because of color keying\n");
2748         /* To perform the color key conversion we need a sysmem copy of
2749          * the surface. Make sure we have it. */
2750
2751         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2752         /* Make sure the texture is reloaded because of the color key change,
2753          * this kills performance though :( */
2754         /* TODO: This is not necessarily needed with hw palettized texture support. */
2755         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2756         /* Switching color keying on / off may change the internal format. */
2757         if (ck_changed)
2758             surface_force_reload(surface);
2759     }
2760     else if (!(surface->flags & flag))
2761     {
2762         TRACE("Reloading because surface is dirty.\n");
2763     }
2764     else
2765     {
2766         TRACE("surface is already in texture\n");
2767         return WINED3D_OK;
2768     }
2769
2770     /* No partial locking for textures yet. */
2771     surface_load_location(surface, flag, NULL);
2772     surface_evict_sysmem(surface);
2773
2774     return WINED3D_OK;
2775 }
2776
2777 /* See also float_16_to_32() in wined3d_private.h */
2778 static inline unsigned short float_32_to_16(const float *in)
2779 {
2780     int exp = 0;
2781     float tmp = fabsf(*in);
2782     unsigned int mantissa;
2783     unsigned short ret;
2784
2785     /* Deal with special numbers */
2786     if (*in == 0.0f)
2787         return 0x0000;
2788     if (isnan(*in))
2789         return 0x7c01;
2790     if (isinf(*in))
2791         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2792
2793     if (tmp < powf(2, 10))
2794     {
2795         do
2796         {
2797             tmp = tmp * 2.0f;
2798             exp--;
2799         } while (tmp < powf(2, 10));
2800     }
2801     else if (tmp >= powf(2, 11))
2802     {
2803         do
2804         {
2805             tmp /= 2.0f;
2806             exp++;
2807         } while (tmp >= powf(2, 11));
2808     }
2809
2810     mantissa = (unsigned int)tmp;
2811     if (tmp - mantissa >= 0.5f)
2812         ++mantissa; /* Round to nearest, away from zero. */
2813
2814     exp += 10;  /* Normalize the mantissa. */
2815     exp += 15;  /* Exponent is encoded with excess 15. */
2816
2817     if (exp > 30) /* too big */
2818     {
2819         ret = 0x7c00; /* INF */
2820     }
2821     else if (exp <= 0)
2822     {
2823         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2824         while (exp <= 0)
2825         {
2826             mantissa = mantissa >> 1;
2827             ++exp;
2828         }
2829         ret = mantissa & 0x3ff;
2830     }
2831     else
2832     {
2833         ret = (exp << 10) | (mantissa & 0x3ff);
2834     }
2835
2836     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2837     return ret;
2838 }
2839
2840 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2841 {
2842     ULONG refcount;
2843
2844     TRACE("Surface %p, container %p of type %#x.\n",
2845             surface, surface->container.u.base, surface->container.type);
2846
2847     switch (surface->container.type)
2848     {
2849         case WINED3D_CONTAINER_TEXTURE:
2850             return wined3d_texture_incref(surface->container.u.texture);
2851
2852         case WINED3D_CONTAINER_SWAPCHAIN:
2853             return wined3d_swapchain_incref(surface->container.u.swapchain);
2854
2855         default:
2856             ERR("Unhandled container type %#x.\n", surface->container.type);
2857         case WINED3D_CONTAINER_NONE:
2858             break;
2859     }
2860
2861     refcount = InterlockedIncrement(&surface->resource.ref);
2862     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2863
2864     return refcount;
2865 }
2866
2867 /* Do not call while under the GL lock. */
2868 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2869 {
2870     ULONG refcount;
2871
2872     TRACE("Surface %p, container %p of type %#x.\n",
2873             surface, surface->container.u.base, surface->container.type);
2874
2875     switch (surface->container.type)
2876     {
2877         case WINED3D_CONTAINER_TEXTURE:
2878             return wined3d_texture_decref(surface->container.u.texture);
2879
2880         case WINED3D_CONTAINER_SWAPCHAIN:
2881             return wined3d_swapchain_decref(surface->container.u.swapchain);
2882
2883         default:
2884             ERR("Unhandled container type %#x.\n", surface->container.type);
2885         case WINED3D_CONTAINER_NONE:
2886             break;
2887     }
2888
2889     refcount = InterlockedDecrement(&surface->resource.ref);
2890     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2891
2892     if (!refcount)
2893     {
2894         surface_cleanup(surface);
2895         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2896
2897         TRACE("Destroyed surface %p.\n", surface);
2898         HeapFree(GetProcessHeap(), 0, surface);
2899     }
2900
2901     return refcount;
2902 }
2903
2904 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2905 {
2906     return resource_set_priority(&surface->resource, priority);
2907 }
2908
2909 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2910 {
2911     return resource_get_priority(&surface->resource);
2912 }
2913
2914 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2915 {
2916     TRACE("surface %p.\n", surface);
2917
2918     if (!surface->resource.device->d3d_initialized)
2919     {
2920         ERR("D3D not initialized.\n");
2921         return;
2922     }
2923
2924     surface_internal_preload(surface, SRGB_ANY);
2925 }
2926
2927 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2928 {
2929     TRACE("surface %p.\n", surface);
2930
2931     return surface->resource.parent;
2932 }
2933
2934 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2935 {
2936     TRACE("surface %p.\n", surface);
2937
2938     return &surface->resource;
2939 }
2940
2941 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2942 {
2943     TRACE("surface %p, flags %#x.\n", surface, flags);
2944
2945     switch (flags)
2946     {
2947         case WINEDDGBS_CANBLT:
2948         case WINEDDGBS_ISBLTDONE:
2949             return WINED3D_OK;
2950
2951         default:
2952             return WINED3DERR_INVALIDCALL;
2953     }
2954 }
2955
2956 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2957 {
2958     TRACE("surface %p, flags %#x.\n", surface, flags);
2959
2960     /* XXX: DDERR_INVALIDSURFACETYPE */
2961
2962     switch (flags)
2963     {
2964         case WINEDDGFS_CANFLIP:
2965         case WINEDDGFS_ISFLIPDONE:
2966             return WINED3D_OK;
2967
2968         default:
2969             return WINED3DERR_INVALIDCALL;
2970     }
2971 }
2972
2973 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2974 {
2975     TRACE("surface %p.\n", surface);
2976
2977     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2978     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2979 }
2980
2981 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2982 {
2983     TRACE("surface %p.\n", surface);
2984
2985     surface->flags &= ~SFLAG_LOST;
2986     return WINED3D_OK;
2987 }
2988
2989 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2990 {
2991     TRACE("surface %p, palette %p.\n", surface, palette);
2992
2993     if (surface->palette == palette)
2994     {
2995         TRACE("Nop palette change.\n");
2996         return WINED3D_OK;
2997     }
2998
2999     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
3000         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
3001
3002     surface->palette = palette;
3003
3004     if (palette)
3005     {
3006         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3007             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
3008
3009         surface->surface_ops->surface_realize_palette(surface);
3010     }
3011
3012     return WINED3D_OK;
3013 }
3014
3015 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
3016         DWORD flags, const struct wined3d_color_key *color_key)
3017 {
3018     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
3019
3020     if (flags & WINEDDCKEY_COLORSPACE)
3021     {
3022         FIXME(" colorkey value not supported (%08x) !\n", flags);
3023         return WINED3DERR_INVALIDCALL;
3024     }
3025
3026     /* Dirtify the surface, but only if a key was changed. */
3027     if (color_key)
3028     {
3029         switch (flags & ~WINEDDCKEY_COLORSPACE)
3030         {
3031             case WINEDDCKEY_DESTBLT:
3032                 surface->dst_blt_color_key = *color_key;
3033                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3034                 break;
3035
3036             case WINEDDCKEY_DESTOVERLAY:
3037                 surface->dst_overlay_color_key = *color_key;
3038                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3039                 break;
3040
3041             case WINEDDCKEY_SRCOVERLAY:
3042                 surface->src_overlay_color_key = *color_key;
3043                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3044                 break;
3045
3046             case WINEDDCKEY_SRCBLT:
3047                 surface->src_blt_color_key = *color_key;
3048                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3049                 break;
3050         }
3051     }
3052     else
3053     {
3054         switch (flags & ~WINEDDCKEY_COLORSPACE)
3055         {
3056             case WINEDDCKEY_DESTBLT:
3057                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3058                 break;
3059
3060             case WINEDDCKEY_DESTOVERLAY:
3061                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3062                 break;
3063
3064             case WINEDDCKEY_SRCOVERLAY:
3065                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3066                 break;
3067
3068             case WINEDDCKEY_SRCBLT:
3069                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3070                 break;
3071         }
3072     }
3073
3074     return WINED3D_OK;
3075 }
3076
3077 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3078 {
3079     TRACE("surface %p.\n", surface);
3080
3081     return surface->palette;
3082 }
3083
3084 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3085 {
3086     const struct wined3d_format *format = surface->resource.format;
3087     DWORD pitch;
3088
3089     TRACE("surface %p.\n", surface);
3090
3091     if (format->flags & WINED3DFMT_FLAG_BLOCKS)
3092     {
3093         /* Since compressed formats are block based, pitch means the amount of
3094          * bytes to the next row of block rather than the next row of pixels. */
3095         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3096         pitch = row_block_count * format->block_byte_count;
3097     }
3098     else
3099     {
3100         unsigned char alignment = surface->resource.device->surface_alignment;
3101         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3102         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3103     }
3104
3105     TRACE("Returning %u.\n", pitch);
3106
3107     return pitch;
3108 }
3109
3110 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3111 {
3112     TRACE("surface %p, mem %p.\n", surface, mem);
3113
3114     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
3115     {
3116         WARN("Surface is locked or the DC is in use.\n");
3117         return WINED3DERR_INVALIDCALL;
3118     }
3119
3120     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3121     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3122     {
3123         ERR("Not supported on render targets.\n");
3124         return WINED3DERR_INVALIDCALL;
3125     }
3126
3127     if (mem && mem != surface->resource.allocatedMemory)
3128     {
3129         void *release = NULL;
3130
3131         /* Do I have to copy the old surface content? */
3132         if (surface->flags & SFLAG_DIBSECTION)
3133         {
3134             DeleteDC(surface->hDC);
3135             DeleteObject(surface->dib.DIBsection);
3136             surface->dib.bitmap_data = NULL;
3137             surface->resource.allocatedMemory = NULL;
3138             surface->hDC = NULL;
3139             surface->flags &= ~SFLAG_DIBSECTION;
3140         }
3141         else if (!(surface->flags & SFLAG_USERPTR))
3142         {
3143             release = surface->resource.heapMemory;
3144             surface->resource.heapMemory = NULL;
3145         }
3146         surface->resource.allocatedMemory = mem;
3147         surface->flags |= SFLAG_USERPTR;
3148
3149         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3150         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3151
3152         /* For client textures OpenGL has to be notified. */
3153         if (surface->flags & SFLAG_CLIENT)
3154             surface_release_client_storage(surface);
3155
3156         /* Now free the old memory if any. */
3157         HeapFree(GetProcessHeap(), 0, release);
3158     }
3159     else if (surface->flags & SFLAG_USERPTR)
3160     {
3161         /* HeapMemory should be NULL already. */
3162         if (surface->resource.heapMemory)
3163             ERR("User pointer surface has heap memory allocated.\n");
3164
3165         if (!mem)
3166         {
3167             surface->resource.allocatedMemory = NULL;
3168             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3169
3170             if (surface->flags & SFLAG_CLIENT)
3171                 surface_release_client_storage(surface);
3172
3173             surface_prepare_system_memory(surface);
3174         }
3175
3176         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3177     }
3178
3179     return WINED3D_OK;
3180 }
3181
3182 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3183 {
3184     LONG w, h;
3185
3186     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3187
3188     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3189     {
3190         WARN("Not an overlay surface.\n");
3191         return WINEDDERR_NOTAOVERLAYSURFACE;
3192     }
3193
3194     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3195     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3196     surface->overlay_destrect.left = x;
3197     surface->overlay_destrect.top = y;
3198     surface->overlay_destrect.right = x + w;
3199     surface->overlay_destrect.bottom = y + h;
3200
3201     surface_draw_overlay(surface);
3202
3203     return WINED3D_OK;
3204 }
3205
3206 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3207 {
3208     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3209
3210     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3211     {
3212         TRACE("Not an overlay surface.\n");
3213         return WINEDDERR_NOTAOVERLAYSURFACE;
3214     }
3215
3216     if (!surface->overlay_dest)
3217     {
3218         TRACE("Overlay not visible.\n");
3219         *x = 0;
3220         *y = 0;
3221         return WINEDDERR_OVERLAYNOTVISIBLE;
3222     }
3223
3224     *x = surface->overlay_destrect.left;
3225     *y = surface->overlay_destrect.top;
3226
3227     TRACE("Returning position %d, %d.\n", *x, *y);
3228
3229     return WINED3D_OK;
3230 }
3231
3232 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3233         DWORD flags, struct wined3d_surface *ref)
3234 {
3235     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3236
3237     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3238     {
3239         TRACE("Not an overlay surface.\n");
3240         return WINEDDERR_NOTAOVERLAYSURFACE;
3241     }
3242
3243     return WINED3D_OK;
3244 }
3245
3246 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3247         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3248 {
3249     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3250             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3251
3252     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3253     {
3254         WARN("Not an overlay surface.\n");
3255         return WINEDDERR_NOTAOVERLAYSURFACE;
3256     }
3257     else if (!dst_surface)
3258     {
3259         WARN("Dest surface is NULL.\n");
3260         return WINED3DERR_INVALIDCALL;
3261     }
3262
3263     if (src_rect)
3264     {
3265         surface->overlay_srcrect = *src_rect;
3266     }
3267     else
3268     {
3269         surface->overlay_srcrect.left = 0;
3270         surface->overlay_srcrect.top = 0;
3271         surface->overlay_srcrect.right = surface->resource.width;
3272         surface->overlay_srcrect.bottom = surface->resource.height;
3273     }
3274
3275     if (dst_rect)
3276     {
3277         surface->overlay_destrect = *dst_rect;
3278     }
3279     else
3280     {
3281         surface->overlay_destrect.left = 0;
3282         surface->overlay_destrect.top = 0;
3283         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3284         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3285     }
3286
3287     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3288     {
3289         surface->overlay_dest = NULL;
3290         list_remove(&surface->overlay_entry);
3291     }
3292
3293     if (flags & WINEDDOVER_SHOW)
3294     {
3295         if (surface->overlay_dest != dst_surface)
3296         {
3297             surface->overlay_dest = dst_surface;
3298             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3299         }
3300     }
3301     else if (flags & WINEDDOVER_HIDE)
3302     {
3303         /* tests show that the rectangles are erased on hide */
3304         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3305         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3306         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3307         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3308         surface->overlay_dest = NULL;
3309     }
3310
3311     surface_draw_overlay(surface);
3312
3313     return WINED3D_OK;
3314 }
3315
3316 HRESULT CDECL wined3d_surface_set_format(struct wined3d_surface *surface, enum wined3d_format_id format_id)
3317 {
3318     const struct wined3d_format *format = wined3d_get_format(&surface->resource.device->adapter->gl_info, format_id);
3319
3320     TRACE("surface %p, format %s.\n", surface, debug_d3dformat(format_id));
3321
3322     if (surface->resource.format->id != WINED3DFMT_UNKNOWN)
3323     {
3324         FIXME("The format of the surface must be WINED3DFORMAT_UNKNOWN.\n");
3325         return WINED3DERR_INVALIDCALL;
3326     }
3327
3328     surface->resource.size = wined3d_format_calculate_size(format, surface->resource.device->surface_alignment,
3329             surface->pow2Width, surface->pow2Height);
3330     surface->flags |= (WINED3DFMT_D16_LOCKABLE == format_id) ? SFLAG_LOCKABLE : 0;
3331     surface->resource.format = format;
3332
3333     TRACE("size %u, byte_count %u\n", surface->resource.size, format->byte_count);
3334     TRACE("glFormat %#x, glInternal %#x, glType %#x.\n",
3335             format->glFormat, format->glInternal, format->glType);
3336
3337     return WINED3D_OK;
3338 }
3339
3340 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3341         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3342 {
3343     unsigned short *dst_s;
3344     const float *src_f;
3345     unsigned int x, y;
3346
3347     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3348
3349     for (y = 0; y < h; ++y)
3350     {
3351         src_f = (const float *)(src + y * pitch_in);
3352         dst_s = (unsigned short *) (dst + y * pitch_out);
3353         for (x = 0; x < w; ++x)
3354         {
3355             dst_s[x] = float_32_to_16(src_f + x);
3356         }
3357     }
3358 }
3359
3360 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3361         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3362 {
3363     static const unsigned char convert_5to8[] =
3364     {
3365         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3366         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3367         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3368         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3369     };
3370     static const unsigned char convert_6to8[] =
3371     {
3372         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3373         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3374         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3375         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3376         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3377         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3378         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3379         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3380     };
3381     unsigned int x, y;
3382
3383     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3384
3385     for (y = 0; y < h; ++y)
3386     {
3387         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3388         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3389         for (x = 0; x < w; ++x)
3390         {
3391             WORD pixel = src_line[x];
3392             dst_line[x] = 0xff000000
3393                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3394                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3395                     | convert_5to8[(pixel & 0x001f)];
3396         }
3397     }
3398 }
3399
3400 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3401  * in both cases we're just setting the X / Alpha channel to 0xff. */
3402 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3403         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3404 {
3405     unsigned int x, y;
3406
3407     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3408
3409     for (y = 0; y < h; ++y)
3410     {
3411         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3412         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3413
3414         for (x = 0; x < w; ++x)
3415         {
3416             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3417         }
3418     }
3419 }
3420
3421 static inline BYTE cliptobyte(int x)
3422 {
3423     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3424 }
3425
3426 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3427         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3428 {
3429     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3430     unsigned int x, y;
3431
3432     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3433
3434     for (y = 0; y < h; ++y)
3435     {
3436         const BYTE *src_line = src + y * pitch_in;
3437         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3438         for (x = 0; x < w; ++x)
3439         {
3440             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3441              *     C = Y - 16; D = U - 128; E = V - 128;
3442              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3443              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3444              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3445              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3446              * U and V are shared between the pixels. */
3447             if (!(x & 1)) /* For every even pixel, read new U and V. */
3448             {
3449                 d = (int) src_line[1] - 128;
3450                 e = (int) src_line[3] - 128;
3451                 r2 = 409 * e + 128;
3452                 g2 = - 100 * d - 208 * e + 128;
3453                 b2 = 516 * d + 128;
3454             }
3455             c2 = 298 * ((int) src_line[0] - 16);
3456             dst_line[x] = 0xff000000
3457                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3458                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3459                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3460                 /* Scale RGB values to 0..255 range,
3461                  * then clip them if still not in range (may be negative),
3462                  * then shift them within DWORD if necessary. */
3463             src_line += 2;
3464         }
3465     }
3466 }
3467
3468 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3469         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3470 {
3471     unsigned int x, y;
3472     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3473
3474     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3475
3476     for (y = 0; y < h; ++y)
3477     {
3478         const BYTE *src_line = src + y * pitch_in;
3479         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3480         for (x = 0; x < w; ++x)
3481         {
3482             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3483              *     C = Y - 16; D = U - 128; E = V - 128;
3484              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3485              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3486              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3487              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3488              * U and V are shared between the pixels. */
3489             if (!(x & 1)) /* For every even pixel, read new U and V. */
3490             {
3491                 d = (int) src_line[1] - 128;
3492                 e = (int) src_line[3] - 128;
3493                 r2 = 409 * e + 128;
3494                 g2 = - 100 * d - 208 * e + 128;
3495                 b2 = 516 * d + 128;
3496             }
3497             c2 = 298 * ((int) src_line[0] - 16);
3498             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3499                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3500                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3501                 /* Scale RGB values to 0..255 range,
3502                  * then clip them if still not in range (may be negative),
3503                  * then shift them within DWORD if necessary. */
3504             src_line += 2;
3505         }
3506     }
3507 }
3508
3509 struct d3dfmt_convertor_desc
3510 {
3511     enum wined3d_format_id from, to;
3512     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3513 };
3514
3515 static const struct d3dfmt_convertor_desc convertors[] =
3516 {
3517     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3518     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3519     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3520     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3521     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3522     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3523 };
3524
3525 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3526         enum wined3d_format_id to)
3527 {
3528     unsigned int i;
3529
3530     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3531     {
3532         if (convertors[i].from == from && convertors[i].to == to)
3533             return &convertors[i];
3534     }
3535
3536     return NULL;
3537 }
3538
3539 /*****************************************************************************
3540  * surface_convert_format
3541  *
3542  * Creates a duplicate of a surface in a different format. Is used by Blt to
3543  * blit between surfaces with different formats.
3544  *
3545  * Parameters
3546  *  source: Source surface
3547  *  fmt: Requested destination format
3548  *
3549  *****************************************************************************/
3550 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3551 {
3552     struct wined3d_mapped_rect src_map, dst_map;
3553     const struct d3dfmt_convertor_desc *conv;
3554     struct wined3d_surface *ret = NULL;
3555     HRESULT hr;
3556
3557     conv = find_convertor(source->resource.format->id, to_fmt);
3558     if (!conv)
3559     {
3560         FIXME("Cannot find a conversion function from format %s to %s.\n",
3561                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3562         return NULL;
3563     }
3564
3565     wined3d_surface_create(source->resource.device, source->resource.width,
3566             source->resource.height, to_fmt, 0 /* level */, 0 /* usage */, WINED3DPOOL_SCRATCH,
3567             WINED3D_MULTISAMPLE_NONE /* TODO: Multisampled conversion */, 0 /* MultiSampleQuality */,
3568             source->surface_type, WINED3D_SURFACE_MAPPABLE | WINED3D_SURFACE_DISCARD,
3569             NULL /* parent */, &wined3d_null_parent_ops, &ret);
3570     if (!ret)
3571     {
3572         ERR("Failed to create a destination surface for conversion.\n");
3573         return NULL;
3574     }
3575
3576     memset(&src_map, 0, sizeof(src_map));
3577     memset(&dst_map, 0, sizeof(dst_map));
3578
3579     hr = wined3d_surface_map(source, &src_map, NULL, WINED3DLOCK_READONLY);
3580     if (FAILED(hr))
3581     {
3582         ERR("Failed to lock the source surface.\n");
3583         wined3d_surface_decref(ret);
3584         return NULL;
3585     }
3586     hr = wined3d_surface_map(ret, &dst_map, NULL, WINED3DLOCK_READONLY);
3587     if (FAILED(hr))
3588     {
3589         ERR("Failed to lock the destination surface.\n");
3590         wined3d_surface_unmap(source);
3591         wined3d_surface_decref(ret);
3592         return NULL;
3593     }
3594
3595     conv->convert(src_map.data, dst_map.data, src_map.row_pitch, dst_map.row_pitch,
3596             source->resource.width, source->resource.height);
3597
3598     wined3d_surface_unmap(ret);
3599     wined3d_surface_unmap(source);
3600
3601     return ret;
3602 }
3603
3604 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3605         unsigned int bpp, UINT pitch, DWORD color)
3606 {
3607     BYTE *first;
3608     int x, y;
3609
3610     /* Do first row */
3611
3612 #define COLORFILL_ROW(type) \
3613 do { \
3614     type *d = (type *)buf; \
3615     for (x = 0; x < width; ++x) \
3616         d[x] = (type)color; \
3617 } while(0)
3618
3619     switch (bpp)
3620     {
3621         case 1:
3622             COLORFILL_ROW(BYTE);
3623             break;
3624
3625         case 2:
3626             COLORFILL_ROW(WORD);
3627             break;
3628
3629         case 3:
3630         {
3631             BYTE *d = buf;
3632             for (x = 0; x < width; ++x, d += 3)
3633             {
3634                 d[0] = (color      ) & 0xFF;
3635                 d[1] = (color >>  8) & 0xFF;
3636                 d[2] = (color >> 16) & 0xFF;
3637             }
3638             break;
3639         }
3640         case 4:
3641             COLORFILL_ROW(DWORD);
3642             break;
3643
3644         default:
3645             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3646             return WINED3DERR_NOTAVAILABLE;
3647     }
3648
3649 #undef COLORFILL_ROW
3650
3651     /* Now copy first row. */
3652     first = buf;
3653     for (y = 1; y < height; ++y)
3654     {
3655         buf += pitch;
3656         memcpy(buf, first, width * bpp);
3657     }
3658
3659     return WINED3D_OK;
3660 }
3661
3662 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3663 {
3664     TRACE("surface %p.\n", surface);
3665
3666     if (!(surface->flags & SFLAG_LOCKED))
3667     {
3668         WARN("Trying to unmap unmapped surface.\n");
3669         return WINEDDERR_NOTLOCKED;
3670     }
3671     surface->flags &= ~SFLAG_LOCKED;
3672
3673     surface->surface_ops->surface_unmap(surface);
3674
3675     return WINED3D_OK;
3676 }
3677
3678 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3679         struct wined3d_mapped_rect *mapped_rect, const RECT *rect, DWORD flags)
3680 {
3681     const struct wined3d_format *format = surface->resource.format;
3682
3683     TRACE("surface %p, mapped_rect %p, rect %s, flags %#x.\n",
3684             surface, mapped_rect, wine_dbgstr_rect(rect), flags);
3685
3686     if (surface->flags & SFLAG_LOCKED)
3687     {
3688         WARN("Surface is already mapped.\n");
3689         return WINED3DERR_INVALIDCALL;
3690     }
3691     if ((format->flags & WINED3DFMT_FLAG_BLOCKS)
3692             && rect && (rect->left || rect->top
3693             || rect->right != surface->resource.width
3694             || rect->bottom != surface->resource.height))
3695     {
3696         UINT width_mask = format->block_width - 1;
3697         UINT height_mask = format->block_height - 1;
3698
3699         if ((rect->left & width_mask) || (rect->right & width_mask)
3700                 || (rect->top & height_mask) || (rect->bottom & height_mask))
3701         {
3702             WARN("Map rect %s is misaligned for %ux%u blocks.\n",
3703                     wine_dbgstr_rect(rect), format->block_width, format->block_height);
3704
3705             if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3706                 return WINED3DERR_INVALIDCALL;
3707         }
3708     }
3709
3710     surface->flags |= SFLAG_LOCKED;
3711
3712     if (!(surface->flags & SFLAG_LOCKABLE))
3713         WARN("Trying to lock unlockable surface.\n");
3714
3715     /* Performance optimization: Count how often a surface is mapped, if it is
3716      * mapped regularly do not throw away the system memory copy. This avoids
3717      * the need to download the surface from OpenGL all the time. The surface
3718      * is still downloaded if the OpenGL texture is changed. */
3719     if (!(surface->flags & SFLAG_DYNLOCK))
3720     {
3721         if (++surface->lockCount > MAXLOCKCOUNT)
3722         {
3723             TRACE("Surface is mapped regularly, not freeing the system memory copy any more.\n");
3724             surface->flags |= SFLAG_DYNLOCK;
3725         }
3726     }
3727
3728     surface->surface_ops->surface_map(surface, rect, flags);
3729
3730     if (format->flags & WINED3DFMT_FLAG_BROKEN_PITCH)
3731         mapped_rect->row_pitch = surface->resource.width * format->byte_count;
3732     else
3733         mapped_rect->row_pitch = wined3d_surface_get_pitch(surface);
3734
3735     if (!rect)
3736     {
3737         mapped_rect->data = surface->resource.allocatedMemory;
3738         surface->lockedRect.left = 0;
3739         surface->lockedRect.top = 0;
3740         surface->lockedRect.right = surface->resource.width;
3741         surface->lockedRect.bottom = surface->resource.height;
3742     }
3743     else
3744     {
3745         if ((format->flags & (WINED3DFMT_FLAG_BLOCKS | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_BLOCKS)
3746         {
3747             /* Compressed textures are block based, so calculate the offset of
3748              * the block that contains the top-left pixel of the locked rectangle. */
3749             mapped_rect->data = surface->resource.allocatedMemory
3750                     + ((rect->top / format->block_height) * mapped_rect->row_pitch)
3751                     + ((rect->left / format->block_width) * format->block_byte_count);
3752         }
3753         else
3754         {
3755             mapped_rect->data = surface->resource.allocatedMemory
3756                     + (mapped_rect->row_pitch * rect->top)
3757                     + (rect->left * format->byte_count);
3758         }
3759         surface->lockedRect.left = rect->left;
3760         surface->lockedRect.top = rect->top;
3761         surface->lockedRect.right = rect->right;
3762         surface->lockedRect.bottom = rect->bottom;
3763     }
3764
3765     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3766     TRACE("Returning memory %p, pitch %u.\n", mapped_rect->data, mapped_rect->row_pitch);
3767
3768     return WINED3D_OK;
3769 }
3770
3771 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3772 {
3773     struct wined3d_mapped_rect map;
3774     HRESULT hr;
3775
3776     TRACE("surface %p, dc %p.\n", surface, dc);
3777
3778     if (surface->flags & SFLAG_USERPTR)
3779     {
3780         ERR("Not supported on surfaces with application-provided memory.\n");
3781         return WINEDDERR_NODC;
3782     }
3783
3784     /* Give more detailed info for ddraw. */
3785     if (surface->flags & SFLAG_DCINUSE)
3786         return WINEDDERR_DCALREADYCREATED;
3787
3788     /* Can't GetDC if the surface is locked. */
3789     if (surface->flags & SFLAG_LOCKED)
3790         return WINED3DERR_INVALIDCALL;
3791
3792     /* Create a DIB section if there isn't a dc yet. */
3793     if (!surface->hDC)
3794     {
3795         if (surface->flags & SFLAG_CLIENT)
3796         {
3797             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3798             surface_release_client_storage(surface);
3799         }
3800         hr = surface_create_dib_section(surface);
3801         if (FAILED(hr))
3802             return WINED3DERR_INVALIDCALL;
3803
3804         /* Use the DIB section from now on if we are not using a PBO. */
3805         if (!(surface->flags & SFLAG_PBO))
3806             surface->resource.allocatedMemory = surface->dib.bitmap_data;
3807     }
3808
3809     /* Map the surface. */
3810     hr = wined3d_surface_map(surface, &map, NULL, 0);
3811     if (FAILED(hr))
3812     {
3813         ERR("Map failed, hr %#x.\n", hr);
3814         return hr;
3815     }
3816
3817     /* Sync the DIB with the PBO. This can't be done earlier because Map()
3818      * activates the allocatedMemory. */
3819     if (surface->flags & SFLAG_PBO)
3820         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
3821
3822     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3823             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3824     {
3825         /* GetDC on palettized formats is unsupported in D3D9, and the method
3826          * is missing in D3D8, so this should only be used for DX <=7
3827          * surfaces (with non-device palettes). */
3828         const PALETTEENTRY *pal = NULL;
3829
3830         if (surface->palette)
3831         {
3832             pal = surface->palette->palents;
3833         }
3834         else
3835         {
3836             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3837             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3838
3839             if (dds_primary && dds_primary->palette)
3840                 pal = dds_primary->palette->palents;
3841         }
3842
3843         if (pal)
3844         {
3845             RGBQUAD col[256];
3846             unsigned int i;
3847
3848             for (i = 0; i < 256; ++i)
3849             {
3850                 col[i].rgbRed = pal[i].peRed;
3851                 col[i].rgbGreen = pal[i].peGreen;
3852                 col[i].rgbBlue = pal[i].peBlue;
3853                 col[i].rgbReserved = 0;
3854             }
3855             SetDIBColorTable(surface->hDC, 0, 256, col);
3856         }
3857     }
3858
3859     surface->flags |= SFLAG_DCINUSE;
3860
3861     *dc = surface->hDC;
3862     TRACE("Returning dc %p.\n", *dc);
3863
3864     return WINED3D_OK;
3865 }
3866
3867 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3868 {
3869     TRACE("surface %p, dc %p.\n", surface, dc);
3870
3871     if (!(surface->flags & SFLAG_DCINUSE))
3872         return WINEDDERR_NODC;
3873
3874     if (surface->hDC != dc)
3875     {
3876         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3877                 dc, surface->hDC);
3878         return WINEDDERR_NODC;
3879     }
3880
3881     /* Copy the contents of the DIB over to the PBO. */
3882     if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
3883         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
3884
3885     /* We locked first, so unlock now. */
3886     wined3d_surface_unmap(surface);
3887
3888     surface->flags &= ~SFLAG_DCINUSE;
3889
3890     return WINED3D_OK;
3891 }
3892
3893 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3894 {
3895     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3896
3897     if (flags)
3898     {
3899         static UINT once;
3900         if (!once++)
3901             FIXME("Ignoring flags %#x.\n", flags);
3902         else
3903             WARN("Ignoring flags %#x.\n", flags);
3904     }
3905
3906     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
3907     {
3908         ERR("Not supported on swapchain surfaces.\n");
3909         return WINEDDERR_NOTFLIPPABLE;
3910     }
3911
3912     /* Flipping is only supported on render targets and overlays. */
3913     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
3914     {
3915         WARN("Tried to flip a non-render target, non-overlay surface.\n");
3916         return WINEDDERR_NOTFLIPPABLE;
3917     }
3918
3919     flip_surface(surface, override);
3920
3921     /* Update overlays if they're visible. */
3922     if ((surface->resource.usage & WINED3DUSAGE_OVERLAY) && surface->overlay_dest)
3923         return surface_draw_overlay(surface);
3924
3925     return WINED3D_OK;
3926 }
3927
3928 /* Do not call while under the GL lock. */
3929 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3930 {
3931     struct wined3d_device *device = surface->resource.device;
3932
3933     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3934
3935     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3936     {
3937         struct wined3d_texture *texture = surface->container.u.texture;
3938
3939         TRACE("Passing to container (%p).\n", texture);
3940         texture->texture_ops->texture_preload(texture, srgb);
3941     }
3942     else
3943     {
3944         struct wined3d_context *context;
3945
3946         TRACE("(%p) : About to load surface\n", surface);
3947
3948         /* TODO: Use already acquired context when possible. */
3949         context = context_acquire(device, NULL);
3950
3951         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3952
3953         if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3954         {
3955             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3956             GLclampf tmp;
3957             tmp = 0.9f;
3958             ENTER_GL();
3959             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3960             LEAVE_GL();
3961         }
3962
3963         context_release(context);
3964     }
3965 }
3966
3967 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3968 {
3969     if (!surface->resource.allocatedMemory)
3970     {
3971         if (!surface->resource.heapMemory)
3972         {
3973             if (!(surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
3974                     surface->resource.size + RESOURCE_ALIGNMENT)))
3975             {
3976                 ERR("Failed to allocate memory.\n");
3977                 return FALSE;
3978             }
3979         }
3980         else if (!(surface->flags & SFLAG_CLIENT))
3981         {
3982             ERR("Surface %p has heapMemory %p and flags %#x.\n",
3983                     surface, surface->resource.heapMemory, surface->flags);
3984         }
3985
3986         surface->resource.allocatedMemory =
3987             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
3988     }
3989     else
3990     {
3991         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
3992     }
3993
3994     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3995
3996     return TRUE;
3997 }
3998
3999 /* Read the framebuffer back into the surface */
4000 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
4001 {
4002     struct wined3d_device *device = surface->resource.device;
4003     const struct wined3d_gl_info *gl_info;
4004     struct wined3d_context *context;
4005     BYTE *mem;
4006     GLint fmt;
4007     GLint type;
4008     BYTE *row, *top, *bottom;
4009     int i;
4010     BOOL bpp;
4011     RECT local_rect;
4012     BOOL srcIsUpsideDown;
4013     GLint rowLen = 0;
4014     GLint skipPix = 0;
4015     GLint skipRow = 0;
4016
4017     context = context_acquire(device, surface);
4018     context_apply_blit_state(context, device);
4019     gl_info = context->gl_info;
4020
4021     ENTER_GL();
4022
4023     /* Select the correct read buffer, and give some debug output.
4024      * There is no need to keep track of the current read buffer or reset it, every part of the code
4025      * that reads sets the read buffer as desired.
4026      */
4027     if (surface_is_offscreen(surface))
4028     {
4029         /* Mapping the primary render target which is not on a swapchain.
4030          * Read from the back buffer. */
4031         TRACE("Mapping offscreen render target.\n");
4032         glReadBuffer(device->offscreenBuffer);
4033         srcIsUpsideDown = TRUE;
4034     }
4035     else
4036     {
4037         /* Onscreen surfaces are always part of a swapchain */
4038         GLenum buffer = surface_get_gl_buffer(surface);
4039         TRACE("Mapping %#x buffer.\n", buffer);
4040         glReadBuffer(buffer);
4041         checkGLcall("glReadBuffer");
4042         srcIsUpsideDown = FALSE;
4043     }
4044
4045     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
4046     if (!rect)
4047     {
4048         local_rect.left = 0;
4049         local_rect.top = 0;
4050         local_rect.right = surface->resource.width;
4051         local_rect.bottom = surface->resource.height;
4052     }
4053     else
4054     {
4055         local_rect = *rect;
4056     }
4057     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
4058
4059     switch (surface->resource.format->id)
4060     {
4061         case WINED3DFMT_P8_UINT:
4062         {
4063             if (primary_render_target_is_p8(device))
4064             {
4065                 /* In case of P8 render targets the index is stored in the alpha component */
4066                 fmt = GL_ALPHA;
4067                 type = GL_UNSIGNED_BYTE;
4068                 mem = dest;
4069                 bpp = surface->resource.format->byte_count;
4070             }
4071             else
4072             {
4073                 /* GL can't return palettized data, so read ARGB pixels into a
4074                  * separate block of memory and convert them into palettized format
4075                  * in software. Slow, but if the app means to use palettized render
4076                  * targets and locks it...
4077                  *
4078                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4079                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4080                  * for the color channels when palettizing the colors.
4081                  */
4082                 fmt = GL_RGB;
4083                 type = GL_UNSIGNED_BYTE;
4084                 pitch *= 3;
4085                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4086                 if (!mem)
4087                 {
4088                     ERR("Out of memory\n");
4089                     LEAVE_GL();
4090                     return;
4091                 }
4092                 bpp = surface->resource.format->byte_count * 3;
4093             }
4094         }
4095         break;
4096
4097         default:
4098             mem = dest;
4099             fmt = surface->resource.format->glFormat;
4100             type = surface->resource.format->glType;
4101             bpp = surface->resource.format->byte_count;
4102     }
4103
4104     if (surface->flags & SFLAG_PBO)
4105     {
4106         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4107         checkGLcall("glBindBufferARB");
4108         if (mem)
4109         {
4110             ERR("mem not null for pbo -- unexpected\n");
4111             mem = NULL;
4112         }
4113     }
4114
4115     /* Save old pixel store pack state */
4116     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4117     checkGLcall("glGetIntegerv");
4118     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4119     checkGLcall("glGetIntegerv");
4120     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4121     checkGLcall("glGetIntegerv");
4122
4123     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4124     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4125     checkGLcall("glPixelStorei");
4126     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4127     checkGLcall("glPixelStorei");
4128     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4129     checkGLcall("glPixelStorei");
4130
4131     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4132             local_rect.right - local_rect.left,
4133             local_rect.bottom - local_rect.top,
4134             fmt, type, mem);
4135     checkGLcall("glReadPixels");
4136
4137     /* Reset previous pixel store pack state */
4138     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4139     checkGLcall("glPixelStorei");
4140     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4141     checkGLcall("glPixelStorei");
4142     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4143     checkGLcall("glPixelStorei");
4144
4145     if (surface->flags & SFLAG_PBO)
4146     {
4147         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4148         checkGLcall("glBindBufferARB");
4149
4150         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4151          * to get a pointer to it and perform the flipping in software. This is a lot
4152          * faster than calling glReadPixels for each line. In case we want more speed
4153          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4154         if (!srcIsUpsideDown)
4155         {
4156             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4157             checkGLcall("glBindBufferARB");
4158
4159             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4160             checkGLcall("glMapBufferARB");
4161         }
4162     }
4163
4164     /* TODO: Merge this with the palettization loop below for P8 targets */
4165     if(!srcIsUpsideDown) {
4166         UINT len, off;
4167         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4168             Flip the lines in software */
4169         len = (local_rect.right - local_rect.left) * bpp;
4170         off = local_rect.left * bpp;
4171
4172         row = HeapAlloc(GetProcessHeap(), 0, len);
4173         if(!row) {
4174             ERR("Out of memory\n");
4175             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4176                 HeapFree(GetProcessHeap(), 0, mem);
4177             LEAVE_GL();
4178             return;
4179         }
4180
4181         top = mem + pitch * local_rect.top;
4182         bottom = mem + pitch * (local_rect.bottom - 1);
4183         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4184             memcpy(row, top + off, len);
4185             memcpy(top + off, bottom + off, len);
4186             memcpy(bottom + off, row, len);
4187             top += pitch;
4188             bottom -= pitch;
4189         }
4190         HeapFree(GetProcessHeap(), 0, row);
4191
4192         /* Unmap the temp PBO buffer */
4193         if (surface->flags & SFLAG_PBO)
4194         {
4195             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4196             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4197         }
4198     }
4199
4200     LEAVE_GL();
4201     context_release(context);
4202
4203     /* For P8 textures we need to perform an inverse palette lookup. This is
4204      * done by searching for a palette index which matches the RGB value.
4205      * Note this isn't guaranteed to work when there are multiple entries for
4206      * the same color but we have no choice. In case of P8 render targets,
4207      * the index is stored in the alpha component so no conversion is needed. */
4208     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4209     {
4210         const PALETTEENTRY *pal = NULL;
4211         DWORD width = pitch / 3;
4212         int x, y, c;
4213
4214         if (surface->palette)
4215         {
4216             pal = surface->palette->palents;
4217         }
4218         else
4219         {
4220             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4221             HeapFree(GetProcessHeap(), 0, mem);
4222             return;
4223         }
4224
4225         for(y = local_rect.top; y < local_rect.bottom; y++) {
4226             for(x = local_rect.left; x < local_rect.right; x++) {
4227                 /*                      start              lines            pixels      */
4228                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4229                 const BYTE *green = blue  + 1;
4230                 const BYTE *red = green + 1;
4231
4232                 for(c = 0; c < 256; c++) {
4233                     if(*red   == pal[c].peRed   &&
4234                        *green == pal[c].peGreen &&
4235                        *blue  == pal[c].peBlue)
4236                     {
4237                         *((BYTE *) dest + y * width + x) = c;
4238                         break;
4239                     }
4240                 }
4241             }
4242         }
4243         HeapFree(GetProcessHeap(), 0, mem);
4244     }
4245 }
4246
4247 /* Read the framebuffer contents into a texture. Note that this function
4248  * doesn't do any kind of flipping. Using this on an onscreen surface will
4249  * result in a flipped D3D texture. */
4250 void surface_load_fb_texture(struct wined3d_surface *surface, BOOL srgb)
4251 {
4252     struct wined3d_device *device = surface->resource.device;
4253     struct wined3d_context *context;
4254
4255     context = context_acquire(device, surface);
4256     device_invalidate_state(device, STATE_FRAMEBUFFER);
4257
4258     surface_prepare_texture(surface, context, srgb);
4259     surface_bind_and_dirtify(surface, context, srgb);
4260
4261     TRACE("Reading back offscreen render target %p.\n", surface);
4262
4263     ENTER_GL();
4264
4265     if (surface_is_offscreen(surface))
4266         glReadBuffer(device->offscreenBuffer);
4267     else
4268         glReadBuffer(surface_get_gl_buffer(surface));
4269     checkGLcall("glReadBuffer");
4270
4271     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4272             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4273     checkGLcall("glCopyTexSubImage2D");
4274
4275     LEAVE_GL();
4276
4277     context_release(context);
4278 }
4279
4280 /* Context activation is done by the caller. */
4281 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4282         struct wined3d_context *context, BOOL srgb)
4283 {
4284     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4285     CONVERT_TYPES convert;
4286     struct wined3d_format format;
4287
4288     if (surface->flags & alloc_flag) return;
4289
4290     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4291     if (convert != NO_CONVERSION || format.convert) surface->flags |= SFLAG_CONVERTED;
4292     else surface->flags &= ~SFLAG_CONVERTED;
4293
4294     surface_bind_and_dirtify(surface, context, srgb);
4295     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4296     surface->flags |= alloc_flag;
4297 }
4298
4299 /* Context activation is done by the caller. */
4300 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4301 {
4302     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4303     {
4304         struct wined3d_texture *texture = surface->container.u.texture;
4305         UINT sub_count = texture->level_count * texture->layer_count;
4306         UINT i;
4307
4308         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4309
4310         for (i = 0; i < sub_count; ++i)
4311         {
4312             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4313             surface_prepare_texture_internal(s, context, srgb);
4314         }
4315
4316         return;
4317     }
4318
4319     surface_prepare_texture_internal(surface, context, srgb);
4320 }
4321
4322 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4323 {
4324     if (multisample)
4325     {
4326         if (surface->rb_multisample)
4327             return;
4328
4329         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4330         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4331         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4332                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4333         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4334     }
4335     else
4336     {
4337         if (surface->rb_resolved)
4338             return;
4339
4340         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4341         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4342         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4343                 surface->pow2Width, surface->pow2Height);
4344         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4345     }
4346 }
4347
4348 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4349         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4350 {
4351     struct wined3d_device *device = surface->resource.device;
4352     UINT pitch = wined3d_surface_get_pitch(surface);
4353     const struct wined3d_gl_info *gl_info;
4354     struct wined3d_context *context;
4355     RECT local_rect;
4356     UINT w, h;
4357
4358     surface_get_rect(surface, rect, &local_rect);
4359
4360     mem += local_rect.top * pitch + local_rect.left * bpp;
4361     w = local_rect.right - local_rect.left;
4362     h = local_rect.bottom - local_rect.top;
4363
4364     /* Activate the correct context for the render target */
4365     context = context_acquire(device, surface);
4366     context_apply_blit_state(context, device);
4367     gl_info = context->gl_info;
4368
4369     ENTER_GL();
4370
4371     if (!surface_is_offscreen(surface))
4372     {
4373         GLenum buffer = surface_get_gl_buffer(surface);
4374         TRACE("Unlocking %#x buffer.\n", buffer);
4375         context_set_draw_buffer(context, buffer);
4376
4377         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4378         glPixelZoom(1.0f, -1.0f);
4379     }
4380     else
4381     {
4382         /* Primary offscreen render target */
4383         TRACE("Offscreen render target.\n");
4384         context_set_draw_buffer(context, device->offscreenBuffer);
4385
4386         glPixelZoom(1.0f, 1.0f);
4387     }
4388
4389     glRasterPos3i(local_rect.left, local_rect.top, 1);
4390     checkGLcall("glRasterPos3i");
4391
4392     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4393     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4394
4395     if (surface->flags & SFLAG_PBO)
4396     {
4397         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4398         checkGLcall("glBindBufferARB");
4399     }
4400
4401     glDrawPixels(w, h, fmt, type, mem);
4402     checkGLcall("glDrawPixels");
4403
4404     if (surface->flags & SFLAG_PBO)
4405     {
4406         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4407         checkGLcall("glBindBufferARB");
4408     }
4409
4410     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4411     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4412
4413     LEAVE_GL();
4414
4415     if (wined3d_settings.strict_draw_ordering
4416             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4417             && surface->container.u.swapchain->front_buffer == surface))
4418         wglFlush();
4419
4420     context_release(context);
4421 }
4422
4423 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck,
4424         BOOL use_texturing, struct wined3d_format *format, CONVERT_TYPES *convert)
4425 {
4426     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4427     const struct wined3d_device *device = surface->resource.device;
4428     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4429     BOOL blit_supported = FALSE;
4430
4431     /* Copy the default values from the surface. Below we might perform fixups */
4432     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4433     *format = *surface->resource.format;
4434     *convert = NO_CONVERSION;
4435
4436     /* Ok, now look if we have to do any conversion */
4437     switch (surface->resource.format->id)
4438     {
4439         case WINED3DFMT_P8_UINT:
4440             /* Below the call to blit_supported is disabled for Wine 1.2
4441              * because the function isn't operating correctly yet. At the
4442              * moment 8-bit blits are handled in software and if certain GL
4443              * extensions are around, surface conversion is performed at
4444              * upload time. The blit_supported call recognizes it as a
4445              * destination fixup. This type of upload 'fixup' and 8-bit to
4446              * 8-bit blits need to be handled by the blit_shader.
4447              * TODO: get rid of this #if 0. */
4448 #if 0
4449             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4450                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4451                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4452 #endif
4453             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4454
4455             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4456              * texturing. Further also use conversion in case of color keying.
4457              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4458              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4459              * conflicts with this.
4460              */
4461             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4462                     || colorkey_active || !use_texturing)
4463             {
4464                 format->glFormat = GL_RGBA;
4465                 format->glInternal = GL_RGBA;
4466                 format->glType = GL_UNSIGNED_BYTE;
4467                 format->conv_byte_count = 4;
4468                 if (colorkey_active)
4469                     *convert = CONVERT_PALETTED_CK;
4470                 else
4471                     *convert = CONVERT_PALETTED;
4472             }
4473             break;
4474
4475         case WINED3DFMT_B2G3R3_UNORM:
4476             /* **********************
4477                 GL_UNSIGNED_BYTE_3_3_2
4478                 ********************** */
4479             if (colorkey_active) {
4480                 /* This texture format will never be used.. So do not care about color keying
4481                     up until the point in time it will be needed :-) */
4482                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4483             }
4484             break;
4485
4486         case WINED3DFMT_B5G6R5_UNORM:
4487             if (colorkey_active)
4488             {
4489                 *convert = CONVERT_CK_565;
4490                 format->glFormat = GL_RGBA;
4491                 format->glInternal = GL_RGB5_A1;
4492                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4493                 format->conv_byte_count = 2;
4494             }
4495             break;
4496
4497         case WINED3DFMT_B5G5R5X1_UNORM:
4498             if (colorkey_active)
4499             {
4500                 *convert = CONVERT_CK_5551;
4501                 format->glFormat = GL_BGRA;
4502                 format->glInternal = GL_RGB5_A1;
4503                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4504                 format->conv_byte_count = 2;
4505             }
4506             break;
4507
4508         case WINED3DFMT_B8G8R8_UNORM:
4509             if (colorkey_active)
4510             {
4511                 *convert = CONVERT_CK_RGB24;
4512                 format->glFormat = GL_RGBA;
4513                 format->glInternal = GL_RGBA8;
4514                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4515                 format->conv_byte_count = 4;
4516             }
4517             break;
4518
4519         case WINED3DFMT_B8G8R8X8_UNORM:
4520             if (colorkey_active)
4521             {
4522                 *convert = CONVERT_RGB32_888;
4523                 format->glFormat = GL_RGBA;
4524                 format->glInternal = GL_RGBA8;
4525                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4526                 format->conv_byte_count = 4;
4527             }
4528             break;
4529
4530         default:
4531             break;
4532     }
4533
4534     return WINED3D_OK;
4535 }
4536
4537 static BOOL color_in_range(const struct wined3d_color_key *color_key, DWORD color)
4538 {
4539     /* FIXME: Is this really how color keys are supposed to work? I think it
4540      * makes more sense to compare the individual channels. */
4541     return color >= color_key->color_space_low_value
4542             && color <= color_key->color_space_high_value;
4543 }
4544
4545 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4546 {
4547     const struct wined3d_device *device = surface->resource.device;
4548     const struct wined3d_palette *pal = surface->palette;
4549     BOOL index_in_alpha = FALSE;
4550     unsigned int i;
4551
4552     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4553      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4554      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4555      * duplicate entries. Store the color key in the unused alpha component to speed the
4556      * download up and to make conversion unneeded. */
4557     index_in_alpha = primary_render_target_is_p8(device);
4558
4559     if (!pal)
4560     {
4561         ERR("This code should never get entered for DirectDraw!, expect problems\n");
4562         if (index_in_alpha)
4563         {
4564             /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4565              * there's no palette at this time. */
4566             for (i = 0; i < 256; i++) table[i][3] = i;
4567         }
4568     }
4569     else
4570     {
4571         TRACE("Using surface palette %p\n", pal);
4572         /* Get the surface's palette */
4573         for (i = 0; i < 256; ++i)
4574         {
4575             table[i][0] = pal->palents[i].peRed;
4576             table[i][1] = pal->palents[i].peGreen;
4577             table[i][2] = pal->palents[i].peBlue;
4578
4579             /* When index_in_alpha is set the palette index is stored in the
4580              * alpha component. In case of a readback we can then read
4581              * GL_ALPHA. Color keying is handled in BltOverride using a
4582              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4583              * color key itself is passed to glAlphaFunc in other cases the
4584              * alpha component of pixels that should be masked away is set to 0. */
4585             if (index_in_alpha)
4586                 table[i][3] = i;
4587             else if (colorkey && color_in_range(&surface->src_blt_color_key, i))
4588                 table[i][3] = 0x00;
4589             else if (pal->flags & WINEDDPCAPS_ALPHA)
4590                 table[i][3] = pal->palents[i].peFlags;
4591             else
4592                 table[i][3] = 0xFF;
4593         }
4594     }
4595 }
4596
4597 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width,
4598         UINT height, UINT outpitch, CONVERT_TYPES convert, struct wined3d_surface *surface)
4599 {
4600     const BYTE *source;
4601     BYTE *dest;
4602     TRACE("(%p)->(%p),(%d,%d,%d,%d,%p)\n", src, dst, pitch, height, outpitch, convert, surface);
4603
4604     switch (convert) {
4605         case NO_CONVERSION:
4606         {
4607             memcpy(dst, src, pitch * height);
4608             break;
4609         }
4610         case CONVERT_PALETTED:
4611         case CONVERT_PALETTED_CK:
4612         {
4613             BYTE table[256][4];
4614             unsigned int x, y;
4615
4616             d3dfmt_p8_init_palette(surface, table, (convert == CONVERT_PALETTED_CK));
4617
4618             for (y = 0; y < height; y++)
4619             {
4620                 source = src + pitch * y;
4621                 dest = dst + outpitch * y;
4622                 /* This is an 1 bpp format, using the width here is fine */
4623                 for (x = 0; x < width; x++) {
4624                     BYTE color = *source++;
4625                     *dest++ = table[color][0];
4626                     *dest++ = table[color][1];
4627                     *dest++ = table[color][2];
4628                     *dest++ = table[color][3];
4629                 }
4630             }
4631         }
4632         break;
4633
4634         case CONVERT_CK_565:
4635         {
4636             /* Converting the 565 format in 5551 packed to emulate color-keying.
4637
4638               Note : in all these conversion, it would be best to average the averaging
4639                       pixels to get the color of the pixel that will be color-keyed to
4640                       prevent 'color bleeding'. This will be done later on if ever it is
4641                       too visible.
4642
4643               Note2: Nvidia documents say that their driver does not support alpha + color keying
4644                      on the same surface and disables color keying in such a case
4645             */
4646             unsigned int x, y;
4647             const WORD *Source;
4648             WORD *Dest;
4649
4650             TRACE("Color keyed 565\n");
4651
4652             for (y = 0; y < height; y++) {
4653                 Source = (const WORD *)(src + y * pitch);
4654                 Dest = (WORD *) (dst + y * outpitch);
4655                 for (x = 0; x < width; x++ ) {
4656                     WORD color = *Source++;
4657                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4658                     if (!color_in_range(&surface->src_blt_color_key, color))
4659                         *Dest |= 0x0001;
4660                     Dest++;
4661                 }
4662             }
4663         }
4664         break;
4665
4666         case CONVERT_CK_5551:
4667         {
4668             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4669             unsigned int x, y;
4670             const WORD *Source;
4671             WORD *Dest;
4672             TRACE("Color keyed 5551\n");
4673             for (y = 0; y < height; y++) {
4674                 Source = (const WORD *)(src + y * pitch);
4675                 Dest = (WORD *) (dst + y * outpitch);
4676                 for (x = 0; x < width; x++ ) {
4677                     WORD color = *Source++;
4678                     *Dest = color;
4679                     if (!color_in_range(&surface->src_blt_color_key, color))
4680                         *Dest |= (1 << 15);
4681                     else
4682                         *Dest &= ~(1 << 15);
4683                     Dest++;
4684                 }
4685             }
4686         }
4687         break;
4688
4689         case CONVERT_CK_RGB24:
4690         {
4691             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4692             unsigned int x, y;
4693             for (y = 0; y < height; y++)
4694             {
4695                 source = src + pitch * y;
4696                 dest = dst + outpitch * y;
4697                 for (x = 0; x < width; x++) {
4698                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4699                     DWORD dstcolor = color << 8;
4700                     if (!color_in_range(&surface->src_blt_color_key, color))
4701                         dstcolor |= 0xff;
4702                     *(DWORD*)dest = dstcolor;
4703                     source += 3;
4704                     dest += 4;
4705                 }
4706             }
4707         }
4708         break;
4709
4710         case CONVERT_RGB32_888:
4711         {
4712             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4713             unsigned int x, y;
4714             for (y = 0; y < height; y++)
4715             {
4716                 source = src + pitch * y;
4717                 dest = dst + outpitch * y;
4718                 for (x = 0; x < width; x++) {
4719                     DWORD color = 0xffffff & *(const DWORD*)source;
4720                     DWORD dstcolor = color << 8;
4721                     if (!color_in_range(&surface->src_blt_color_key, color))
4722                         dstcolor |= 0xff;
4723                     *(DWORD*)dest = dstcolor;
4724                     source += 4;
4725                     dest += 4;
4726                 }
4727             }
4728         }
4729         break;
4730
4731         default:
4732             ERR("Unsupported conversion type %#x.\n", convert);
4733     }
4734     return WINED3D_OK;
4735 }
4736
4737 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4738 {
4739     /* Flip the surface contents */
4740     /* Flip the DC */
4741     {
4742         HDC tmp;
4743         tmp = front->hDC;
4744         front->hDC = back->hDC;
4745         back->hDC = tmp;
4746     }
4747
4748     /* Flip the DIBsection */
4749     {
4750         HBITMAP tmp = front->dib.DIBsection;
4751         front->dib.DIBsection = back->dib.DIBsection;
4752         back->dib.DIBsection = tmp;
4753     }
4754
4755     /* Flip the surface data */
4756     {
4757         void* tmp;
4758
4759         tmp = front->dib.bitmap_data;
4760         front->dib.bitmap_data = back->dib.bitmap_data;
4761         back->dib.bitmap_data = tmp;
4762
4763         tmp = front->resource.allocatedMemory;
4764         front->resource.allocatedMemory = back->resource.allocatedMemory;
4765         back->resource.allocatedMemory = tmp;
4766
4767         tmp = front->resource.heapMemory;
4768         front->resource.heapMemory = back->resource.heapMemory;
4769         back->resource.heapMemory = tmp;
4770     }
4771
4772     /* Flip the PBO */
4773     {
4774         GLuint tmp_pbo = front->pbo;
4775         front->pbo = back->pbo;
4776         back->pbo = tmp_pbo;
4777     }
4778
4779     /* Flip the opengl texture */
4780     {
4781         GLuint tmp;
4782
4783         tmp = back->texture_name;
4784         back->texture_name = front->texture_name;
4785         front->texture_name = tmp;
4786
4787         tmp = back->texture_name_srgb;
4788         back->texture_name_srgb = front->texture_name_srgb;
4789         front->texture_name_srgb = tmp;
4790
4791         tmp = back->rb_multisample;
4792         back->rb_multisample = front->rb_multisample;
4793         front->rb_multisample = tmp;
4794
4795         tmp = back->rb_resolved;
4796         back->rb_resolved = front->rb_resolved;
4797         front->rb_resolved = tmp;
4798
4799         resource_unload(&back->resource);
4800         resource_unload(&front->resource);
4801     }
4802
4803     {
4804         DWORD tmp_flags = back->flags;
4805         back->flags = front->flags;
4806         front->flags = tmp_flags;
4807     }
4808 }
4809
4810 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4811  * pixel copy calls. */
4812 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4813         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4814 {
4815     struct wined3d_device *device = dst_surface->resource.device;
4816     float xrel, yrel;
4817     UINT row;
4818     struct wined3d_context *context;
4819     BOOL upsidedown = FALSE;
4820     RECT dst_rect = *dst_rect_in;
4821
4822     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4823      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4824      */
4825     if(dst_rect.top > dst_rect.bottom) {
4826         UINT tmp = dst_rect.bottom;
4827         dst_rect.bottom = dst_rect.top;
4828         dst_rect.top = tmp;
4829         upsidedown = TRUE;
4830     }
4831
4832     context = context_acquire(device, src_surface);
4833     context_apply_blit_state(context, device);
4834     surface_internal_preload(dst_surface, SRGB_RGB);
4835     ENTER_GL();
4836
4837     /* Bind the target texture */
4838     context_bind_texture(context, dst_surface->texture_target, dst_surface->texture_name);
4839     if (surface_is_offscreen(src_surface))
4840     {
4841         TRACE("Reading from an offscreen target\n");
4842         upsidedown = !upsidedown;
4843         glReadBuffer(device->offscreenBuffer);
4844     }
4845     else
4846     {
4847         glReadBuffer(surface_get_gl_buffer(src_surface));
4848     }
4849     checkGLcall("glReadBuffer");
4850
4851     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4852     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4853
4854     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4855     {
4856         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4857
4858         if(Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT) {
4859             ERR("Texture filtering not supported in direct blit\n");
4860         }
4861     }
4862     else if ((Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT)
4863             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4864     {
4865         ERR("Texture filtering not supported in direct blit\n");
4866     }
4867
4868     if (upsidedown
4869             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4870             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4871     {
4872         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4873
4874         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4875                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4876                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4877                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4878     }
4879     else
4880     {
4881         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4882         /* I have to process this row by row to swap the image,
4883          * otherwise it would be upside down, so stretching in y direction
4884          * doesn't cost extra time
4885          *
4886          * However, stretching in x direction can be avoided if not necessary
4887          */
4888         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4889             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4890             {
4891                 /* Well, that stuff works, but it's very slow.
4892                  * find a better way instead
4893                  */
4894                 UINT col;
4895
4896                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4897                 {
4898                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4899                             dst_rect.left + col /* x offset */, row /* y offset */,
4900                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4901                 }
4902             }
4903             else
4904             {
4905                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4906                         dst_rect.left /* x offset */, row /* y offset */,
4907                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4908             }
4909         }
4910     }
4911     checkGLcall("glCopyTexSubImage2D");
4912
4913     LEAVE_GL();
4914     context_release(context);
4915
4916     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4917      * path is never entered
4918      */
4919     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4920 }
4921
4922 /* Uses the hardware to stretch and flip the image */
4923 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4924         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4925 {
4926     struct wined3d_device *device = dst_surface->resource.device;
4927     struct wined3d_swapchain *src_swapchain = NULL;
4928     GLuint src, backup = 0;
4929     float left, right, top, bottom; /* Texture coordinates */
4930     UINT fbwidth = src_surface->resource.width;
4931     UINT fbheight = src_surface->resource.height;
4932     struct wined3d_context *context;
4933     GLenum drawBuffer = GL_BACK;
4934     GLenum texture_target;
4935     BOOL noBackBufferBackup;
4936     BOOL src_offscreen;
4937     BOOL upsidedown = FALSE;
4938     RECT dst_rect = *dst_rect_in;
4939
4940     TRACE("Using hwstretch blit\n");
4941     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4942     context = context_acquire(device, src_surface);
4943     context_apply_blit_state(context, device);
4944     surface_internal_preload(dst_surface, SRGB_RGB);
4945
4946     src_offscreen = surface_is_offscreen(src_surface);
4947     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4948     if (!noBackBufferBackup && !src_surface->texture_name)
4949     {
4950         /* Get it a description */
4951         surface_internal_preload(src_surface, SRGB_RGB);
4952     }
4953     ENTER_GL();
4954
4955     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4956      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4957      */
4958     if (context->aux_buffers >= 2)
4959     {
4960         /* Got more than one aux buffer? Use the 2nd aux buffer */
4961         drawBuffer = GL_AUX1;
4962     }
4963     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4964     {
4965         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
4966         drawBuffer = GL_AUX0;
4967     }
4968
4969     if(noBackBufferBackup) {
4970         glGenTextures(1, &backup);
4971         checkGLcall("glGenTextures");
4972         context_bind_texture(context, GL_TEXTURE_2D, backup);
4973         texture_target = GL_TEXTURE_2D;
4974     } else {
4975         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
4976          * we are reading from the back buffer, the backup can be used as source texture
4977          */
4978         texture_target = src_surface->texture_target;
4979         context_bind_texture(context, texture_target, src_surface->texture_name);
4980         glEnable(texture_target);
4981         checkGLcall("glEnable(texture_target)");
4982
4983         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
4984         src_surface->flags &= ~SFLAG_INTEXTURE;
4985     }
4986
4987     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4988      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4989      */
4990     if(dst_rect.top > dst_rect.bottom) {
4991         UINT tmp = dst_rect.bottom;
4992         dst_rect.bottom = dst_rect.top;
4993         dst_rect.top = tmp;
4994         upsidedown = TRUE;
4995     }
4996
4997     if (src_offscreen)
4998     {
4999         TRACE("Reading from an offscreen target\n");
5000         upsidedown = !upsidedown;
5001         glReadBuffer(device->offscreenBuffer);
5002     }
5003     else
5004     {
5005         glReadBuffer(surface_get_gl_buffer(src_surface));
5006     }
5007
5008     /* TODO: Only back up the part that will be overwritten */
5009     glCopyTexSubImage2D(texture_target, 0,
5010                         0, 0 /* read offsets */,
5011                         0, 0,
5012                         fbwidth,
5013                         fbheight);
5014
5015     checkGLcall("glCopyTexSubImage2D");
5016
5017     /* No issue with overriding these - the sampler is dirty due to blit usage */
5018     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
5019             wined3d_gl_mag_filter(magLookup, Filter));
5020     checkGLcall("glTexParameteri");
5021     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
5022             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
5023     checkGLcall("glTexParameteri");
5024
5025     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5026         src_swapchain = src_surface->container.u.swapchain;
5027     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
5028     {
5029         src = backup ? backup : src_surface->texture_name;
5030     }
5031     else
5032     {
5033         glReadBuffer(GL_FRONT);
5034         checkGLcall("glReadBuffer(GL_FRONT)");
5035
5036         glGenTextures(1, &src);
5037         checkGLcall("glGenTextures(1, &src)");
5038         context_bind_texture(context, GL_TEXTURE_2D, src);
5039
5040         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5041          * out for power of 2 sizes
5042          */
5043         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5044                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5045         checkGLcall("glTexImage2D");
5046         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
5047                             0, 0 /* read offsets */,
5048                             0, 0,
5049                             fbwidth,
5050                             fbheight);
5051
5052         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5053         checkGLcall("glTexParameteri");
5054         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5055         checkGLcall("glTexParameteri");
5056
5057         glReadBuffer(GL_BACK);
5058         checkGLcall("glReadBuffer(GL_BACK)");
5059
5060         if(texture_target != GL_TEXTURE_2D) {
5061             glDisable(texture_target);
5062             glEnable(GL_TEXTURE_2D);
5063             texture_target = GL_TEXTURE_2D;
5064         }
5065     }
5066     checkGLcall("glEnd and previous");
5067
5068     left = src_rect->left;
5069     right = src_rect->right;
5070
5071     if (!upsidedown)
5072     {
5073         top = src_surface->resource.height - src_rect->top;
5074         bottom = src_surface->resource.height - src_rect->bottom;
5075     }
5076     else
5077     {
5078         top = src_surface->resource.height - src_rect->bottom;
5079         bottom = src_surface->resource.height - src_rect->top;
5080     }
5081
5082     if (src_surface->flags & SFLAG_NORMCOORD)
5083     {
5084         left /= src_surface->pow2Width;
5085         right /= src_surface->pow2Width;
5086         top /= src_surface->pow2Height;
5087         bottom /= src_surface->pow2Height;
5088     }
5089
5090     /* draw the source texture stretched and upside down. The correct surface is bound already */
5091     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5092     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5093
5094     context_set_draw_buffer(context, drawBuffer);
5095     glReadBuffer(drawBuffer);
5096
5097     glBegin(GL_QUADS);
5098         /* bottom left */
5099         glTexCoord2f(left, bottom);
5100         glVertex2i(0, 0);
5101
5102         /* top left */
5103         glTexCoord2f(left, top);
5104         glVertex2i(0, dst_rect.bottom - dst_rect.top);
5105
5106         /* top right */
5107         glTexCoord2f(right, top);
5108         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5109
5110         /* bottom right */
5111         glTexCoord2f(right, bottom);
5112         glVertex2i(dst_rect.right - dst_rect.left, 0);
5113     glEnd();
5114     checkGLcall("glEnd and previous");
5115
5116     if (texture_target != dst_surface->texture_target)
5117     {
5118         glDisable(texture_target);
5119         glEnable(dst_surface->texture_target);
5120         texture_target = dst_surface->texture_target;
5121     }
5122
5123     /* Now read the stretched and upside down image into the destination texture */
5124     context_bind_texture(context, texture_target, dst_surface->texture_name);
5125     glCopyTexSubImage2D(texture_target,
5126                         0,
5127                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5128                         0, 0, /* We blitted the image to the origin */
5129                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5130     checkGLcall("glCopyTexSubImage2D");
5131
5132     if(drawBuffer == GL_BACK) {
5133         /* Write the back buffer backup back */
5134         if(backup) {
5135             if(texture_target != GL_TEXTURE_2D) {
5136                 glDisable(texture_target);
5137                 glEnable(GL_TEXTURE_2D);
5138                 texture_target = GL_TEXTURE_2D;
5139             }
5140             context_bind_texture(context, GL_TEXTURE_2D, backup);
5141         }
5142         else
5143         {
5144             if (texture_target != src_surface->texture_target)
5145             {
5146                 glDisable(texture_target);
5147                 glEnable(src_surface->texture_target);
5148                 texture_target = src_surface->texture_target;
5149             }
5150             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5151         }
5152
5153         glBegin(GL_QUADS);
5154             /* top left */
5155             glTexCoord2f(0.0f, 0.0f);
5156             glVertex2i(0, fbheight);
5157
5158             /* bottom left */
5159             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5160             glVertex2i(0, 0);
5161
5162             /* bottom right */
5163             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5164                     (float)fbheight / (float)src_surface->pow2Height);
5165             glVertex2i(fbwidth, 0);
5166
5167             /* top right */
5168             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5169             glVertex2i(fbwidth, fbheight);
5170         glEnd();
5171     }
5172     glDisable(texture_target);
5173     checkGLcall("glDisable(texture_target)");
5174
5175     /* Cleanup */
5176     if (src != src_surface->texture_name && src != backup)
5177     {
5178         glDeleteTextures(1, &src);
5179         checkGLcall("glDeleteTextures(1, &src)");
5180     }
5181     if(backup) {
5182         glDeleteTextures(1, &backup);
5183         checkGLcall("glDeleteTextures(1, &backup)");
5184     }
5185
5186     LEAVE_GL();
5187
5188     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5189
5190     context_release(context);
5191
5192     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5193      * path is never entered
5194      */
5195     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5196 }
5197
5198 /* Front buffer coordinates are always full screen coordinates, but our GL
5199  * drawable is limited to the window's client area. The sysmem and texture
5200  * copies do have the full screen size. Note that GL has a bottom-left
5201  * origin, while D3D has a top-left origin. */
5202 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5203 {
5204     UINT drawable_height;
5205
5206     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5207             && surface == surface->container.u.swapchain->front_buffer)
5208     {
5209         POINT offset = {0, 0};
5210         RECT windowsize;
5211
5212         ScreenToClient(window, &offset);
5213         OffsetRect(rect, offset.x, offset.y);
5214
5215         GetClientRect(window, &windowsize);
5216         drawable_height = windowsize.bottom - windowsize.top;
5217     }
5218     else
5219     {
5220         drawable_height = surface->resource.height;
5221     }
5222
5223     rect->top = drawable_height - rect->top;
5224     rect->bottom = drawable_height - rect->bottom;
5225 }
5226
5227 static void surface_blt_to_drawable(const struct wined3d_device *device,
5228         WINED3DTEXTUREFILTERTYPE filter, BOOL color_key,
5229         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5230         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5231 {
5232     struct wined3d_context *context;
5233     RECT src_rect, dst_rect;
5234
5235     src_rect = *src_rect_in;
5236     dst_rect = *dst_rect_in;
5237
5238     /* Make sure the surface is up-to-date. This should probably use
5239      * surface_load_location() and worry about the destination surface too,
5240      * unless we're overwriting it completely. */
5241     surface_internal_preload(src_surface, SRGB_RGB);
5242
5243     /* Activate the destination context, set it up for blitting */
5244     context = context_acquire(device, dst_surface);
5245     context_apply_blit_state(context, device);
5246
5247     if (!surface_is_offscreen(dst_surface))
5248         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5249
5250     device->blitter->set_shader(device->blit_priv, context, src_surface);
5251
5252     ENTER_GL();
5253
5254     if (color_key)
5255     {
5256         glEnable(GL_ALPHA_TEST);
5257         checkGLcall("glEnable(GL_ALPHA_TEST)");
5258
5259         /* When the primary render target uses P8, the alpha component
5260          * contains the palette index. Which means that the colorkey is one of
5261          * the palette entries. In other cases pixels that should be masked
5262          * away have alpha set to 0. */
5263         if (primary_render_target_is_p8(device))
5264             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->src_blt_color_key.color_space_low_value / 256.0f);
5265         else
5266             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5267         checkGLcall("glAlphaFunc");
5268     }
5269     else
5270     {
5271         glDisable(GL_ALPHA_TEST);
5272         checkGLcall("glDisable(GL_ALPHA_TEST)");
5273     }
5274
5275     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5276
5277     if (color_key)
5278     {
5279         glDisable(GL_ALPHA_TEST);
5280         checkGLcall("glDisable(GL_ALPHA_TEST)");
5281     }
5282
5283     LEAVE_GL();
5284
5285     /* Leave the opengl state valid for blitting */
5286     device->blitter->unset_shader(context->gl_info);
5287
5288     if (wined3d_settings.strict_draw_ordering
5289             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5290             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5291         wglFlush(); /* Flush to ensure ordering across contexts. */
5292
5293     context_release(context);
5294 }
5295
5296 /* Do not call while under the GL lock. */
5297 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const struct wined3d_color *color)
5298 {
5299     struct wined3d_device *device = s->resource.device;
5300     const struct blit_shader *blitter;
5301
5302     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5303             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5304     if (!blitter)
5305     {
5306         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5307         return WINED3DERR_INVALIDCALL;
5308     }
5309
5310     return blitter->color_fill(device, s, rect, color);
5311 }
5312
5313 /* Do not call while under the GL lock. */
5314 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5315         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5316         WINED3DTEXTUREFILTERTYPE Filter)
5317 {
5318     struct wined3d_device *device = dst_surface->resource.device;
5319     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5320     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5321
5322     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5323             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5324             flags, DDBltFx, debug_d3dtexturefiltertype(Filter));
5325
5326     /* Get the swapchain. One of the surfaces has to be a primary surface */
5327     if (dst_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5328     {
5329         WARN("Destination is in sysmem, rejecting gl blt\n");
5330         return WINED3DERR_INVALIDCALL;
5331     }
5332
5333     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5334         dstSwapchain = dst_surface->container.u.swapchain;
5335
5336     if (src_surface)
5337     {
5338         if (src_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5339         {
5340             WARN("Src is in sysmem, rejecting gl blt\n");
5341             return WINED3DERR_INVALIDCALL;
5342         }
5343
5344         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5345             srcSwapchain = src_surface->container.u.swapchain;
5346     }
5347
5348     /* Early sort out of cases where no render target is used */
5349     if (!dstSwapchain && !srcSwapchain
5350             && src_surface != device->fb.render_targets[0]
5351             && dst_surface != device->fb.render_targets[0])
5352     {
5353         TRACE("No surface is render target, not using hardware blit.\n");
5354         return WINED3DERR_INVALIDCALL;
5355     }
5356
5357     /* No destination color keying supported */
5358     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5359     {
5360         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5361         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5362         return WINED3DERR_INVALIDCALL;
5363     }
5364
5365     if (dstSwapchain && dstSwapchain == srcSwapchain)
5366     {
5367         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5368         return WINED3DERR_INVALIDCALL;
5369     }
5370
5371     if (dstSwapchain && srcSwapchain)
5372     {
5373         FIXME("Implement hardware blit between two different swapchains\n");
5374         return WINED3DERR_INVALIDCALL;
5375     }
5376
5377     if (dstSwapchain)
5378     {
5379         /* Handled with regular texture -> swapchain blit */
5380         if (src_surface == device->fb.render_targets[0])
5381             TRACE("Blit from active render target to a swapchain\n");
5382     }
5383     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5384     {
5385         FIXME("Implement blit from a swapchain to the active render target\n");
5386         return WINED3DERR_INVALIDCALL;
5387     }
5388
5389     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5390     {
5391         /* Blit from render target to texture */
5392         BOOL stretchx;
5393
5394         /* P8 read back is not implemented */
5395         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5396                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5397         {
5398             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5399             return WINED3DERR_INVALIDCALL;
5400         }
5401
5402         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5403         {
5404             TRACE("Color keying not supported by frame buffer to texture blit\n");
5405             return WINED3DERR_INVALIDCALL;
5406             /* Destination color key is checked above */
5407         }
5408
5409         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5410             stretchx = TRUE;
5411         else
5412             stretchx = FALSE;
5413
5414         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5415          * flip the image nor scale it.
5416          *
5417          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5418          * -> If the app wants a image width an unscaled width, copy it line per line
5419          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5420          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5421          *    back buffer. This is slower than reading line per line, thus not used for flipping
5422          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5423          *    pixel by pixel. */
5424         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5425                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5426         {
5427             TRACE("No stretching in x direction, using direct framebuffer -> texture copy\n");
5428             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, Filter);
5429         } else {
5430             TRACE("Using hardware stretching to flip / stretch the texture\n");
5431             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, Filter);
5432         }
5433
5434         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5435         {
5436             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5437             dst_surface->resource.allocatedMemory = NULL;
5438             dst_surface->resource.heapMemory = NULL;
5439         }
5440         else
5441         {
5442             dst_surface->flags &= ~SFLAG_INSYSMEM;
5443         }
5444
5445         return WINED3D_OK;
5446     }
5447     else if (src_surface)
5448     {
5449         /* Blit from offscreen surface to render target */
5450         struct wined3d_color_key old_blt_key = src_surface->src_blt_color_key;
5451         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5452
5453         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5454
5455         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5456                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5457                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5458         {
5459             FIXME("Unsupported blit operation falling back to software\n");
5460             return WINED3DERR_INVALIDCALL;
5461         }
5462
5463         /* Color keying: Check if we have to do a color keyed blt,
5464          * and if not check if a color key is activated.
5465          *
5466          * Just modify the color keying parameters in the surface and restore them afterwards
5467          * The surface keeps track of the color key last used to load the opengl surface.
5468          * PreLoad will catch the change to the flags and color key and reload if necessary.
5469          */
5470         if (flags & WINEDDBLT_KEYSRC)
5471         {
5472             /* Use color key from surface */
5473         }
5474         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5475         {
5476             /* Use color key from DDBltFx */
5477             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5478             src_surface->src_blt_color_key = DDBltFx->ddckSrcColorkey;
5479         }
5480         else
5481         {
5482             /* Do not use color key */
5483             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5484         }
5485
5486         surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5487                 src_surface, src_rect, dst_surface, dst_rect);
5488
5489         /* Restore the color key parameters */
5490         src_surface->CKeyFlags = oldCKeyFlags;
5491         src_surface->src_blt_color_key = old_blt_key;
5492
5493         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5494
5495         return WINED3D_OK;
5496     }
5497
5498     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5499     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5500     return WINED3DERR_INVALIDCALL;
5501 }
5502
5503 /* GL locking is done by the caller */
5504 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5505         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5506 {
5507     struct wined3d_device *device = surface->resource.device;
5508     const struct wined3d_gl_info *gl_info = context->gl_info;
5509     GLint compare_mode = GL_NONE;
5510     struct blt_info info;
5511     GLint old_binding = 0;
5512     RECT rect;
5513
5514     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5515
5516     glDisable(GL_CULL_FACE);
5517     glDisable(GL_BLEND);
5518     glDisable(GL_ALPHA_TEST);
5519     glDisable(GL_SCISSOR_TEST);
5520     glDisable(GL_STENCIL_TEST);
5521     glEnable(GL_DEPTH_TEST);
5522     glDepthFunc(GL_ALWAYS);
5523     glDepthMask(GL_TRUE);
5524     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5525     glViewport(x, y, w, h);
5526
5527     SetRect(&rect, 0, h, w, 0);
5528     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5529     context_active_texture(context, context->gl_info, 0);
5530     glGetIntegerv(info.binding, &old_binding);
5531     glBindTexture(info.bind_target, texture);
5532     if (gl_info->supported[ARB_SHADOW])
5533     {
5534         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5535         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5536     }
5537
5538     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5539             gl_info, info.tex_type, &surface->ds_current_size);
5540
5541     glBegin(GL_TRIANGLE_STRIP);
5542     glTexCoord3fv(info.coords[0]);
5543     glVertex2f(-1.0f, -1.0f);
5544     glTexCoord3fv(info.coords[1]);
5545     glVertex2f(1.0f, -1.0f);
5546     glTexCoord3fv(info.coords[2]);
5547     glVertex2f(-1.0f, 1.0f);
5548     glTexCoord3fv(info.coords[3]);
5549     glVertex2f(1.0f, 1.0f);
5550     glEnd();
5551
5552     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5553     glBindTexture(info.bind_target, old_binding);
5554
5555     glPopAttrib();
5556
5557     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5558 }
5559
5560 void surface_modify_ds_location(struct wined3d_surface *surface,
5561         DWORD location, UINT w, UINT h)
5562 {
5563     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5564
5565     if (location & ~SFLAG_LOCATIONS)
5566         FIXME("Invalid location (%#x) specified.\n", location);
5567
5568     if (!(surface->flags & SFLAG_ALLOCATED))
5569         location &= ~SFLAG_INTEXTURE;
5570     if (!(surface->rb_resolved))
5571         location &= ~SFLAG_INRB_RESOLVED;
5572     if (!(surface->rb_multisample))
5573         location &= ~SFLAG_INRB_MULTISAMPLE;
5574
5575     surface->ds_current_size.cx = w;
5576     surface->ds_current_size.cy = h;
5577     surface->flags &= ~SFLAG_LOCATIONS;
5578     surface->flags |= location;
5579 }
5580
5581 /* Context activation is done by the caller. */
5582 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5583 {
5584     struct wined3d_device *device = surface->resource.device;
5585     GLsizei w, h;
5586
5587     TRACE("surface %p, new location %#x.\n", surface, location);
5588
5589     /* TODO: Make this work for modes other than FBO */
5590     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5591
5592     if (!(surface->flags & location))
5593     {
5594         w = surface->ds_current_size.cx;
5595         h = surface->ds_current_size.cy;
5596         surface->ds_current_size.cx = 0;
5597         surface->ds_current_size.cy = 0;
5598     }
5599     else
5600     {
5601         w = surface->resource.width;
5602         h = surface->resource.height;
5603     }
5604
5605     if (surface->ds_current_size.cx == surface->resource.width
5606             && surface->ds_current_size.cy == surface->resource.height)
5607     {
5608         TRACE("Location (%#x) is already up to date.\n", location);
5609         return;
5610     }
5611
5612     if (surface->current_renderbuffer)
5613     {
5614         FIXME("Not supported with fixed up depth stencil.\n");
5615         return;
5616     }
5617
5618     if (!(surface->flags & SFLAG_LOCATIONS))
5619     {
5620         /* This mostly happens when a depth / stencil is used without being
5621          * cleared first. In principle we could upload from sysmem, or
5622          * explicitly clear before first usage. For the moment there don't
5623          * appear to be a lot of applications depending on this, so a FIXME
5624          * should do. */
5625         FIXME("No up to date depth stencil location.\n");
5626         surface->flags |= location;
5627         surface->ds_current_size.cx = surface->resource.width;
5628         surface->ds_current_size.cy = surface->resource.height;
5629         return;
5630     }
5631
5632     if (location == SFLAG_INTEXTURE)
5633     {
5634         GLint old_binding = 0;
5635         GLenum bind_target;
5636
5637         /* The render target is allowed to be smaller than the depth/stencil
5638          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5639          * than the offscreen surface. Don't overwrite the offscreen surface
5640          * with undefined data. */
5641         w = min(w, context->swapchain->desc.backbuffer_width);
5642         h = min(h, context->swapchain->desc.backbuffer_height);
5643
5644         TRACE("Copying onscreen depth buffer to depth texture.\n");
5645
5646         ENTER_GL();
5647
5648         if (!device->depth_blt_texture)
5649         {
5650             glGenTextures(1, &device->depth_blt_texture);
5651         }
5652
5653         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5654          * directly on the FBO texture. That's because we need to flip. */
5655         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5656                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5657         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5658         {
5659             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5660             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5661         }
5662         else
5663         {
5664             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5665             bind_target = GL_TEXTURE_2D;
5666         }
5667         glBindTexture(bind_target, device->depth_blt_texture);
5668         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5669          * internal format, because the internal format might include stencil
5670          * data. In principle we should copy stencil data as well, but unless
5671          * the driver supports stencil export it's hard to do, and doesn't
5672          * seem to be needed in practice. If the hardware doesn't support
5673          * writing stencil data, the glCopyTexImage2D() call might trigger
5674          * software fallbacks. */
5675         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5676         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5677         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5678         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5679         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5680         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5681         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5682         glBindTexture(bind_target, old_binding);
5683
5684         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5685                 NULL, surface, SFLAG_INTEXTURE);
5686         context_set_draw_buffer(context, GL_NONE);
5687         glReadBuffer(GL_NONE);
5688
5689         /* Do the actual blit */
5690         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5691         checkGLcall("depth_blt");
5692
5693         context_invalidate_state(context, STATE_FRAMEBUFFER);
5694
5695         LEAVE_GL();
5696
5697         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5698     }
5699     else if (location == SFLAG_INDRAWABLE)
5700     {
5701         TRACE("Copying depth texture to onscreen depth buffer.\n");
5702
5703         ENTER_GL();
5704
5705         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5706                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5707         surface_depth_blt(surface, context, surface->texture_name,
5708                 0, surface->pow2Height - h, w, h, surface->texture_target);
5709         checkGLcall("depth_blt");
5710
5711         context_invalidate_state(context, STATE_FRAMEBUFFER);
5712
5713         LEAVE_GL();
5714
5715         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5716     }
5717     else
5718     {
5719         ERR("Invalid location (%#x) specified.\n", location);
5720     }
5721
5722     surface->flags |= location;
5723     surface->ds_current_size.cx = surface->resource.width;
5724     surface->ds_current_size.cy = surface->resource.height;
5725 }
5726
5727 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5728 {
5729     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5730     struct wined3d_surface *overlay;
5731
5732     TRACE("surface %p, location %s, persistent %#x.\n",
5733             surface, debug_surflocation(location), persistent);
5734
5735     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5736             && !(surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
5737             && (location & SFLAG_INDRAWABLE))
5738         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5739
5740     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5741             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5742         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5743
5744     if (persistent)
5745     {
5746         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5747                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5748         {
5749             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5750             {
5751                 TRACE("Passing to container.\n");
5752                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5753             }
5754         }
5755         surface->flags &= ~SFLAG_LOCATIONS;
5756         surface->flags |= location;
5757
5758         /* Redraw emulated overlays, if any */
5759         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5760         {
5761             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5762             {
5763                 surface_draw_overlay(overlay);
5764             }
5765         }
5766     }
5767     else
5768     {
5769         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5770         {
5771             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5772             {
5773                 TRACE("Passing to container\n");
5774                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5775             }
5776         }
5777         surface->flags &= ~location;
5778     }
5779
5780     if (!(surface->flags & SFLAG_LOCATIONS))
5781     {
5782         ERR("Surface %p does not have any up to date location.\n", surface);
5783     }
5784 }
5785
5786 static DWORD resource_access_from_location(DWORD location)
5787 {
5788     switch (location)
5789     {
5790         case SFLAG_INSYSMEM:
5791             return WINED3D_RESOURCE_ACCESS_CPU;
5792
5793         case SFLAG_INDRAWABLE:
5794         case SFLAG_INSRGBTEX:
5795         case SFLAG_INTEXTURE:
5796         case SFLAG_INRB_MULTISAMPLE:
5797         case SFLAG_INRB_RESOLVED:
5798             return WINED3D_RESOURCE_ACCESS_GPU;
5799
5800         default:
5801             FIXME("Unhandled location %#x.\n", location);
5802             return 0;
5803     }
5804 }
5805
5806 static void surface_load_sysmem(struct wined3d_surface *surface,
5807         const struct wined3d_gl_info *gl_info, const RECT *rect)
5808 {
5809     surface_prepare_system_memory(surface);
5810
5811     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED))
5812         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5813
5814     /* Download the surface to system memory. */
5815     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5816     {
5817         struct wined3d_device *device = surface->resource.device;
5818         struct wined3d_context *context;
5819
5820         /* TODO: Use already acquired context when possible. */
5821         context = context_acquire(device, NULL);
5822
5823         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5824         surface_download_data(surface, gl_info);
5825
5826         context_release(context);
5827
5828         return;
5829     }
5830
5831     if (surface->flags & SFLAG_INDRAWABLE)
5832     {
5833         read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5834                 wined3d_surface_get_pitch(surface));
5835         return;
5836     }
5837
5838     FIXME("Can't load surface %p with location flags %#x into sysmem.\n",
5839             surface, surface->flags & SFLAG_LOCATIONS);
5840 }
5841
5842 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5843         const struct wined3d_gl_info *gl_info, const RECT *rect)
5844 {
5845     struct wined3d_device *device = surface->resource.device;
5846     struct wined3d_format format;
5847     CONVERT_TYPES convert;
5848     UINT byte_count;
5849     BYTE *mem;
5850
5851     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5852     {
5853         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5854         return WINED3DERR_INVALIDCALL;
5855     }
5856
5857     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5858         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5859
5860     if (surface->flags & SFLAG_INTEXTURE)
5861     {
5862         RECT r;
5863
5864         surface_get_rect(surface, rect, &r);
5865         surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
5866
5867         return WINED3D_OK;
5868     }
5869
5870     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5871     {
5872         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5873          * path through sysmem. */
5874         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5875     }
5876
5877     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5878
5879     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5880      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5881      * called. */
5882     if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
5883     {
5884         struct wined3d_context *context;
5885
5886         TRACE("Removing the pbo attached to surface %p.\n", surface);
5887
5888         /* TODO: Use already acquired context when possible. */
5889         context = context_acquire(device, NULL);
5890
5891         surface_remove_pbo(surface, gl_info);
5892
5893         context_release(context);
5894     }
5895
5896     if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
5897     {
5898         UINT height = surface->resource.height;
5899         UINT width = surface->resource.width;
5900         UINT src_pitch, dst_pitch;
5901
5902         byte_count = format.conv_byte_count;
5903         src_pitch = wined3d_surface_get_pitch(surface);
5904
5905         /* Stick to the alignment for the converted surface too, makes it
5906          * easier to load the surface. */
5907         dst_pitch = width * byte_count;
5908         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5909
5910         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5911         {
5912             ERR("Out of memory (%u).\n", dst_pitch * height);
5913             return E_OUTOFMEMORY;
5914         }
5915
5916         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5917                 src_pitch, width, height, dst_pitch, convert, surface);
5918
5919         surface->flags |= SFLAG_CONVERTED;
5920     }
5921     else
5922     {
5923         surface->flags &= ~SFLAG_CONVERTED;
5924         mem = surface->resource.allocatedMemory;
5925         byte_count = format.byte_count;
5926     }
5927
5928     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
5929
5930     /* Don't delete PBO memory. */
5931     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
5932         HeapFree(GetProcessHeap(), 0, mem);
5933
5934     return WINED3D_OK;
5935 }
5936
5937 static HRESULT surface_load_texture(struct wined3d_surface *surface,
5938         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
5939 {
5940     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
5941     struct wined3d_device *device = surface->resource.device;
5942     struct wined3d_context *context;
5943     UINT width, src_pitch, dst_pitch;
5944     struct wined3d_bo_address data;
5945     struct wined3d_format format;
5946     POINT dst_point = {0, 0};
5947     CONVERT_TYPES convert;
5948     BYTE *mem;
5949
5950     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
5951             && surface_is_offscreen(surface)
5952             && (surface->flags & SFLAG_INDRAWABLE))
5953     {
5954         surface_load_fb_texture(surface, srgb);
5955
5956         return WINED3D_OK;
5957     }
5958
5959     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
5960             && (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB)
5961             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5962                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
5963                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
5964     {
5965         if (srgb)
5966             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INTEXTURE,
5967                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
5968         else
5969             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INSRGBTEX,
5970                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
5971
5972         return WINED3D_OK;
5973     }
5974
5975     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED)
5976             && (!srgb || (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB))
5977             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5978                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
5979                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
5980     {
5981         DWORD src_location = surface->flags & SFLAG_INRB_RESOLVED ? SFLAG_INRB_RESOLVED : SFLAG_INRB_MULTISAMPLE;
5982         DWORD dst_location = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
5983         RECT rect = {0, 0, surface->resource.width, surface->resource.height};
5984
5985         surface_blt_fbo(device, WINED3DTEXF_POINT, surface, src_location,
5986                 &rect, surface, dst_location, &rect);
5987
5988         return WINED3D_OK;
5989     }
5990
5991     /* Upload from system memory */
5992
5993     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
5994             TRUE /* We will use textures */, &format, &convert);
5995
5996     if (srgb)
5997     {
5998         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
5999         {
6000             /* Performance warning... */
6001             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6002             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6003         }
6004     }
6005     else
6006     {
6007         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6008         {
6009             /* Performance warning... */
6010             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6011             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6012         }
6013     }
6014
6015     if (!(surface->flags & SFLAG_INSYSMEM))
6016     {
6017         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6018         /* Lets hope we get it from somewhere... */
6019         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6020     }
6021
6022     /* TODO: Use already acquired context when possible. */
6023     context = context_acquire(device, NULL);
6024
6025     surface_prepare_texture(surface, context, srgb);
6026     surface_bind_and_dirtify(surface, context, srgb);
6027
6028     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6029     {
6030         surface->flags |= SFLAG_GLCKEY;
6031         surface->gl_color_key = surface->src_blt_color_key;
6032     }
6033     else surface->flags &= ~SFLAG_GLCKEY;
6034
6035     width = surface->resource.width;
6036     src_pitch = wined3d_surface_get_pitch(surface);
6037
6038     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6039      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6040      * called. */
6041     if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
6042     {
6043         TRACE("Removing the pbo attached to surface %p.\n", surface);
6044         surface_remove_pbo(surface, gl_info);
6045     }
6046
6047     if (format.convert)
6048     {
6049         /* This code is entered for texture formats which need a fixup. */
6050         UINT height = surface->resource.height;
6051
6052         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6053         dst_pitch = width * format.conv_byte_count;
6054         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6055
6056         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6057         {
6058             ERR("Out of memory (%u).\n", dst_pitch * height);
6059             context_release(context);
6060             return E_OUTOFMEMORY;
6061         }
6062         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6063         format.byte_count = format.conv_byte_count;
6064         src_pitch = dst_pitch;
6065     }
6066     else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
6067     {
6068         /* This code is only entered for color keying fixups */
6069         UINT height = surface->resource.height;
6070
6071         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6072         dst_pitch = width * format.conv_byte_count;
6073         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6074
6075         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6076         {
6077             ERR("Out of memory (%u).\n", dst_pitch * height);
6078             context_release(context);
6079             return E_OUTOFMEMORY;
6080         }
6081         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6082                 width, height, dst_pitch, convert, surface);
6083         format.byte_count = format.conv_byte_count;
6084         src_pitch = dst_pitch;
6085     }
6086     else
6087     {
6088         mem = surface->resource.allocatedMemory;
6089     }
6090
6091     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6092     data.addr = mem;
6093     surface_upload_data(surface, gl_info, &format, &src_rect, src_pitch, &dst_point, srgb, &data);
6094
6095     context_release(context);
6096
6097     /* Don't delete PBO memory. */
6098     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6099         HeapFree(GetProcessHeap(), 0, mem);
6100
6101     return WINED3D_OK;
6102 }
6103
6104 static void surface_multisample_resolve(struct wined3d_surface *surface)
6105 {
6106     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6107
6108     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6109         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6110
6111     surface_blt_fbo(surface->resource.device, WINED3DTEXF_POINT,
6112             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6113 }
6114
6115 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6116 {
6117     struct wined3d_device *device = surface->resource.device;
6118     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6119     HRESULT hr;
6120
6121     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6122
6123     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6124     {
6125         if (location == SFLAG_INTEXTURE)
6126         {
6127             struct wined3d_context *context = context_acquire(device, NULL);
6128             surface_load_ds_location(surface, context, location);
6129             context_release(context);
6130             return WINED3D_OK;
6131         }
6132         else
6133         {
6134             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6135             return WINED3DERR_INVALIDCALL;
6136         }
6137     }
6138
6139     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6140         location = SFLAG_INTEXTURE;
6141
6142     if (surface->flags & location)
6143     {
6144         TRACE("Location already up to date.\n");
6145
6146         if (location == SFLAG_INSYSMEM && !(surface->flags & SFLAG_PBO)
6147                 && surface_need_pbo(surface, gl_info))
6148             surface_load_pbo(surface, gl_info);
6149
6150         return WINED3D_OK;
6151     }
6152
6153     if (WARN_ON(d3d_surface))
6154     {
6155         DWORD required_access = resource_access_from_location(location);
6156         if ((surface->resource.access_flags & required_access) != required_access)
6157             WARN("Operation requires %#x access, but surface only has %#x.\n",
6158                     required_access, surface->resource.access_flags);
6159     }
6160
6161     if (!(surface->flags & SFLAG_LOCATIONS))
6162     {
6163         ERR("Surface %p does not have any up to date location.\n", surface);
6164         surface->flags |= SFLAG_LOST;
6165         return WINED3DERR_DEVICELOST;
6166     }
6167
6168     switch (location)
6169     {
6170         case SFLAG_INSYSMEM:
6171             surface_load_sysmem(surface, gl_info, rect);
6172             break;
6173
6174         case SFLAG_INDRAWABLE:
6175             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6176                 return hr;
6177             break;
6178
6179         case SFLAG_INRB_RESOLVED:
6180             surface_multisample_resolve(surface);
6181             break;
6182
6183         case SFLAG_INTEXTURE:
6184         case SFLAG_INSRGBTEX:
6185             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6186                 return hr;
6187             break;
6188
6189         default:
6190             ERR("Don't know how to handle location %#x.\n", location);
6191             break;
6192     }
6193
6194     if (!rect)
6195     {
6196         surface->flags |= location;
6197
6198         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6199             surface_evict_sysmem(surface);
6200     }
6201
6202     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6203             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6204     {
6205         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6206     }
6207
6208     return WINED3D_OK;
6209 }
6210
6211 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6212 {
6213     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6214
6215     /* Not on a swapchain - must be offscreen */
6216     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6217
6218     /* The front buffer is always onscreen */
6219     if (surface == swapchain->front_buffer) return FALSE;
6220
6221     /* If the swapchain is rendered to an FBO, the backbuffer is
6222      * offscreen, otherwise onscreen */
6223     return swapchain->render_to_fbo;
6224 }
6225
6226 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6227 /* Context activation is done by the caller. */
6228 static void ffp_blit_free(struct wined3d_device *device) { }
6229
6230 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6231 /* Context activation is done by the caller. */
6232 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6233 {
6234     BYTE table[256][4];
6235     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6236
6237     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6238
6239     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6240     ENTER_GL();
6241     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6242     LEAVE_GL();
6243 }
6244
6245 /* Context activation is done by the caller. */
6246 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6247 {
6248     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6249
6250     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6251      * else the surface is converted in software at upload time in LoadLocation.
6252      */
6253     if (!(surface->flags & SFLAG_CONVERTED) && fixup == COMPLEX_FIXUP_P8
6254             && context->gl_info->supported[EXT_PALETTED_TEXTURE])
6255         ffp_blit_p8_upload_palette(surface, context->gl_info);
6256
6257     ENTER_GL();
6258     glEnable(surface->texture_target);
6259     checkGLcall("glEnable(surface->texture_target)");
6260     LEAVE_GL();
6261     return WINED3D_OK;
6262 }
6263
6264 /* Context activation is done by the caller. */
6265 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6266 {
6267     ENTER_GL();
6268     glDisable(GL_TEXTURE_2D);
6269     checkGLcall("glDisable(GL_TEXTURE_2D)");
6270     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6271     {
6272         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6273         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6274     }
6275     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6276     {
6277         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6278         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6279     }
6280     LEAVE_GL();
6281 }
6282
6283 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6284         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6285         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6286 {
6287     enum complex_fixup src_fixup;
6288
6289     switch (blit_op)
6290     {
6291         case WINED3D_BLIT_OP_COLOR_BLIT:
6292             if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
6293                 return FALSE;
6294
6295             src_fixup = get_complex_fixup(src_format->color_fixup);
6296             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6297             {
6298                 TRACE("Checking support for fixup:\n");
6299                 dump_color_fixup_desc(src_format->color_fixup);
6300             }
6301
6302             if (!is_identity_fixup(dst_format->color_fixup))
6303             {
6304                 TRACE("Destination fixups are not supported\n");
6305                 return FALSE;
6306             }
6307
6308             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6309             {
6310                 TRACE("P8 fixup supported\n");
6311                 return TRUE;
6312             }
6313
6314             /* We only support identity conversions. */
6315             if (is_identity_fixup(src_format->color_fixup))
6316             {
6317                 TRACE("[OK]\n");
6318                 return TRUE;
6319             }
6320
6321             TRACE("[FAILED]\n");
6322             return FALSE;
6323
6324         case WINED3D_BLIT_OP_COLOR_FILL:
6325             if (dst_pool == WINED3DPOOL_SYSTEMMEM)
6326                 return FALSE;
6327
6328             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6329             {
6330                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6331                     return FALSE;
6332             }
6333             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6334             {
6335                 TRACE("Color fill not supported\n");
6336                 return FALSE;
6337             }
6338
6339             /* FIXME: We should reject color fills on formats with fixups,
6340              * but this would break P8 color fills for example. */
6341
6342             return TRUE;
6343
6344         case WINED3D_BLIT_OP_DEPTH_FILL:
6345             return TRUE;
6346
6347         default:
6348             TRACE("Unsupported blit_op=%d\n", blit_op);
6349             return FALSE;
6350     }
6351 }
6352
6353 /* Do not call while under the GL lock. */
6354 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6355         const RECT *dst_rect, const struct wined3d_color *color)
6356 {
6357     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6358     struct wined3d_fb_state fb = {&dst_surface, NULL};
6359
6360     return device_clear_render_targets(device, 1, &fb,
6361             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6362 }
6363
6364 /* Do not call while under the GL lock. */
6365 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6366         struct wined3d_surface *surface, const RECT *rect, float depth)
6367 {
6368     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6369     struct wined3d_fb_state fb = {NULL, surface};
6370
6371     return device_clear_render_targets(device, 0, &fb,
6372             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6373 }
6374
6375 const struct blit_shader ffp_blit =  {
6376     ffp_blit_alloc,
6377     ffp_blit_free,
6378     ffp_blit_set,
6379     ffp_blit_unset,
6380     ffp_blit_supported,
6381     ffp_blit_color_fill,
6382     ffp_blit_depth_fill,
6383 };
6384
6385 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6386 {
6387     return WINED3D_OK;
6388 }
6389
6390 /* Context activation is done by the caller. */
6391 static void cpu_blit_free(struct wined3d_device *device)
6392 {
6393 }
6394
6395 /* Context activation is done by the caller. */
6396 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6397 {
6398     return WINED3D_OK;
6399 }
6400
6401 /* Context activation is done by the caller. */
6402 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6403 {
6404 }
6405
6406 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6407         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6408         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6409 {
6410     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6411     {
6412         return TRUE;
6413     }
6414
6415     return FALSE;
6416 }
6417
6418 static HRESULT surface_cpu_blt_compressed(const BYTE *src_data, BYTE *dst_data,
6419         UINT src_pitch, UINT dst_pitch, UINT update_w, UINT update_h,
6420         const struct wined3d_format *format, DWORD flags, const WINEDDBLTFX *fx)
6421 {
6422     UINT row_block_count;
6423     const BYTE *src_row;
6424     BYTE *dst_row;
6425     UINT x, y;
6426
6427     src_row = src_data;
6428     dst_row = dst_data;
6429
6430     row_block_count = (update_w + format->block_width - 1) / format->block_width;
6431
6432     if (!flags)
6433     {
6434         for (y = 0; y < update_h; y += format->block_height)
6435         {
6436             memcpy(dst_row, src_row, row_block_count * format->block_byte_count);
6437             src_row += src_pitch;
6438             dst_row += dst_pitch;
6439         }
6440
6441         return WINED3D_OK;
6442     }
6443
6444     if (flags == WINEDDBLT_DDFX && fx->dwDDFX == WINEDDBLTFX_MIRRORUPDOWN)
6445     {
6446         src_row += (((update_h / format->block_height) - 1) * src_pitch);
6447
6448         switch (format->id)
6449         {
6450             case WINED3DFMT_DXT1:
6451                 for (y = 0; y < update_h; y += format->block_height)
6452                 {
6453                     struct block
6454                     {
6455                         WORD color[2];
6456                         BYTE control_row[4];
6457                     };
6458
6459                     const struct block *s = (const struct block *)src_row;
6460                     struct block *d = (struct block *)dst_row;
6461
6462                     for (x = 0; x < row_block_count; ++x)
6463                     {
6464                         d[x].color[0] = s[x].color[0];
6465                         d[x].color[1] = s[x].color[1];
6466                         d[x].control_row[0] = s[x].control_row[3];
6467                         d[x].control_row[1] = s[x].control_row[2];
6468                         d[x].control_row[2] = s[x].control_row[1];
6469                         d[x].control_row[3] = s[x].control_row[0];
6470                     }
6471                     src_row -= src_pitch;
6472                     dst_row += dst_pitch;
6473                 }
6474                 return WINED3D_OK;
6475
6476             case WINED3DFMT_DXT3:
6477                 for (y = 0; y < update_h; y += format->block_height)
6478                 {
6479                     struct block
6480                     {
6481                         WORD alpha_row[4];
6482                         WORD color[2];
6483                         BYTE control_row[4];
6484                     };
6485
6486                     const struct block *s = (const struct block *)src_row;
6487                     struct block *d = (struct block *)dst_row;
6488
6489                     for (x = 0; x < row_block_count; ++x)
6490                     {
6491                         d[x].alpha_row[0] = s[x].alpha_row[3];
6492                         d[x].alpha_row[1] = s[x].alpha_row[2];
6493                         d[x].alpha_row[2] = s[x].alpha_row[1];
6494                         d[x].alpha_row[3] = s[x].alpha_row[0];
6495                         d[x].color[0] = s[x].color[0];
6496                         d[x].color[1] = s[x].color[1];
6497                         d[x].control_row[0] = s[x].control_row[3];
6498                         d[x].control_row[1] = s[x].control_row[2];
6499                         d[x].control_row[2] = s[x].control_row[1];
6500                         d[x].control_row[3] = s[x].control_row[0];
6501                     }
6502                     src_row -= src_pitch;
6503                     dst_row += dst_pitch;
6504                 }
6505                 return WINED3D_OK;
6506
6507             default:
6508                 FIXME("Compressed flip not implemented for format %s.\n",
6509                         debug_d3dformat(format->id));
6510                 return E_NOTIMPL;
6511         }
6512     }
6513
6514     FIXME("Unsupported blit on compressed surface (format %s, flags %#x, DDFX %#x).\n",
6515             debug_d3dformat(format->id), flags, flags & WINEDDBLT_DDFX ? fx->dwDDFX : 0);
6516
6517     return E_NOTIMPL;
6518 }
6519
6520 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6521         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6522         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
6523 {
6524     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6525     const struct wined3d_format *src_format, *dst_format;
6526     struct wined3d_surface *orig_src = src_surface;
6527     struct wined3d_mapped_rect dst_map, src_map;
6528     HRESULT hr = WINED3D_OK;
6529     const BYTE *sbuf;
6530     RECT xdst,xsrc;
6531     BYTE *dbuf;
6532     int x, y;
6533
6534     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6535             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6536             flags, fx, debug_d3dtexturefiltertype(filter));
6537
6538     xsrc = *src_rect;
6539
6540     if (!src_surface)
6541     {
6542         RECT full_rect;
6543
6544         full_rect.left = 0;
6545         full_rect.top = 0;
6546         full_rect.right = dst_surface->resource.width;
6547         full_rect.bottom = dst_surface->resource.height;
6548         IntersectRect(&xdst, &full_rect, dst_rect);
6549     }
6550     else
6551     {
6552         BOOL clip_horiz, clip_vert;
6553
6554         xdst = *dst_rect;
6555         clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6556         clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6557
6558         if (clip_vert || clip_horiz)
6559         {
6560             /* Now check if this is a special case or not... */
6561             if ((flags & WINEDDBLT_DDFX)
6562                     || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6563                     || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6564             {
6565                 WARN("Out of screen rectangle in special case. Not handled right now.\n");
6566                 return WINED3D_OK;
6567             }
6568
6569             if (clip_horiz)
6570             {
6571                 if (xdst.left < 0)
6572                 {
6573                     xsrc.left -= xdst.left;
6574                     xdst.left = 0;
6575                 }
6576                 if (xdst.right > dst_surface->resource.width)
6577                 {
6578                     xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6579                     xdst.right = (int)dst_surface->resource.width;
6580                 }
6581             }
6582
6583             if (clip_vert)
6584             {
6585                 if (xdst.top < 0)
6586                 {
6587                     xsrc.top -= xdst.top;
6588                     xdst.top = 0;
6589                 }
6590                 if (xdst.bottom > dst_surface->resource.height)
6591                 {
6592                     xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6593                     xdst.bottom = (int)dst_surface->resource.height;
6594                 }
6595             }
6596
6597             /* And check if after clipping something is still to be done... */
6598             if ((xdst.right <= 0) || (xdst.bottom <= 0)
6599                     || (xdst.left >= (int)dst_surface->resource.width)
6600                     || (xdst.top >= (int)dst_surface->resource.height)
6601                     || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6602                     || (xsrc.left >= (int)src_surface->resource.width)
6603                     || (xsrc.top >= (int)src_surface->resource.height))
6604             {
6605                 TRACE("Nothing to be done after clipping.\n");
6606                 return WINED3D_OK;
6607             }
6608         }
6609     }
6610
6611     if (src_surface == dst_surface)
6612     {
6613         wined3d_surface_map(dst_surface, &dst_map, NULL, 0);
6614         src_map = dst_map;
6615         src_format = dst_surface->resource.format;
6616         dst_format = src_format;
6617     }
6618     else
6619     {
6620         dst_format = dst_surface->resource.format;
6621         if (src_surface)
6622         {
6623             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6624             {
6625                 src_surface = surface_convert_format(src_surface, dst_format->id);
6626                 if (!src_surface)
6627                 {
6628                     /* The conv function writes a FIXME */
6629                     WARN("Cannot convert source surface format to dest format.\n");
6630                     goto release;
6631                 }
6632             }
6633             wined3d_surface_map(src_surface, &src_map, NULL, WINED3DLOCK_READONLY);
6634             src_format = src_surface->resource.format;
6635         }
6636         else
6637         {
6638             src_format = dst_format;
6639         }
6640         if (dst_rect)
6641             wined3d_surface_map(dst_surface, &dst_map, &xdst, 0);
6642         else
6643             wined3d_surface_map(dst_surface, &dst_map, NULL, 0);
6644     }
6645
6646     bpp = dst_surface->resource.format->byte_count;
6647     srcheight = xsrc.bottom - xsrc.top;
6648     srcwidth = xsrc.right - xsrc.left;
6649     dstheight = xdst.bottom - xdst.top;
6650     dstwidth = xdst.right - xdst.left;
6651     width = (xdst.right - xdst.left) * bpp;
6652
6653     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_BLOCKS)
6654     {
6655         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6656
6657         if (src_surface == dst_surface)
6658         {
6659             FIXME("Only plain blits supported on compressed surfaces.\n");
6660             hr = E_NOTIMPL;
6661             goto release;
6662         }
6663
6664         if (srcheight != dstheight || srcwidth != dstwidth)
6665         {
6666             WARN("Stretching not supported on compressed surfaces.\n");
6667             hr = WINED3DERR_INVALIDCALL;
6668             goto release;
6669         }
6670
6671         if (srcwidth & (src_format->block_width - 1) || srcheight & (src_format->block_height - 1))
6672         {
6673             WARN("Rectangle not block-aligned.\n");
6674             hr = WINED3DERR_INVALIDCALL;
6675             goto release;
6676         }
6677
6678         hr = surface_cpu_blt_compressed(src_map.data, dst_map.data,
6679                 src_map.row_pitch, dst_map.row_pitch, dstwidth, dstheight,
6680                 src_format, flags, fx);
6681         goto release;
6682     }
6683
6684     if (dst_rect && src_surface != dst_surface)
6685         dbuf = dst_map.data;
6686     else
6687         dbuf = (BYTE *)dst_map.data + (xdst.top * dst_map.row_pitch) + (xdst.left * bpp);
6688
6689     /* First, all the 'source-less' blits */
6690     if (flags & WINEDDBLT_COLORFILL)
6691     {
6692         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, fx->u5.dwFillColor);
6693         flags &= ~WINEDDBLT_COLORFILL;
6694     }
6695
6696     if (flags & WINEDDBLT_DEPTHFILL)
6697     {
6698         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6699     }
6700     if (flags & WINEDDBLT_ROP)
6701     {
6702         /* Catch some degenerate cases here. */
6703         switch (fx->dwROP)
6704         {
6705             case BLACKNESS:
6706                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, 0);
6707                 break;
6708             case 0xAA0029: /* No-op */
6709                 break;
6710             case WHITENESS:
6711                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, ~0U);
6712                 break;
6713             case SRCCOPY: /* Well, we do that below? */
6714                 break;
6715             default:
6716                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6717                 goto error;
6718         }
6719         flags &= ~WINEDDBLT_ROP;
6720     }
6721     if (flags & WINEDDBLT_DDROPS)
6722     {
6723         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6724     }
6725     /* Now the 'with source' blits. */
6726     if (src_surface)
6727     {
6728         const BYTE *sbase;
6729         int sx, xinc, sy, yinc;
6730
6731         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6732             goto release;
6733
6734         if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
6735                 && (srcwidth != dstwidth || srcheight != dstheight))
6736         {
6737             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6738             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6739         }
6740
6741         sbase = (BYTE *)src_map.data + (xsrc.top * src_map.row_pitch) + xsrc.left * bpp;
6742         xinc = (srcwidth << 16) / dstwidth;
6743         yinc = (srcheight << 16) / dstheight;
6744
6745         if (!flags)
6746         {
6747             /* No effects, we can cheat here. */
6748             if (dstwidth == srcwidth)
6749             {
6750                 if (dstheight == srcheight)
6751                 {
6752                     /* No stretching in either direction. This needs to be as
6753                      * fast as possible. */
6754                     sbuf = sbase;
6755
6756                     /* Check for overlapping surfaces. */
6757                     if (src_surface != dst_surface || xdst.top < xsrc.top
6758                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6759                     {
6760                         /* No overlap, or dst above src, so copy from top downwards. */
6761                         for (y = 0; y < dstheight; ++y)
6762                         {
6763                             memcpy(dbuf, sbuf, width);
6764                             sbuf += src_map.row_pitch;
6765                             dbuf += dst_map.row_pitch;
6766                         }
6767                     }
6768                     else if (xdst.top > xsrc.top)
6769                     {
6770                         /* Copy from bottom upwards. */
6771                         sbuf += src_map.row_pitch * dstheight;
6772                         dbuf += dst_map.row_pitch * dstheight;
6773                         for (y = 0; y < dstheight; ++y)
6774                         {
6775                             sbuf -= src_map.row_pitch;
6776                             dbuf -= dst_map.row_pitch;
6777                             memcpy(dbuf, sbuf, width);
6778                         }
6779                     }
6780                     else
6781                     {
6782                         /* Src and dst overlapping on the same line, use memmove. */
6783                         for (y = 0; y < dstheight; ++y)
6784                         {
6785                             memmove(dbuf, sbuf, width);
6786                             sbuf += src_map.row_pitch;
6787                             dbuf += dst_map.row_pitch;
6788                         }
6789                     }
6790                 }
6791                 else
6792                 {
6793                     /* Stretching in y direction only. */
6794                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6795                     {
6796                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6797                         memcpy(dbuf, sbuf, width);
6798                         dbuf += dst_map.row_pitch;
6799                     }
6800                 }
6801             }
6802             else
6803             {
6804                 /* Stretching in X direction. */
6805                 int last_sy = -1;
6806                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6807                 {
6808                     sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6809
6810                     if ((sy >> 16) == (last_sy >> 16))
6811                     {
6812                         /* This source row is the same as last source row -
6813                          * Copy the already stretched row. */
6814                         memcpy(dbuf, dbuf - dst_map.row_pitch, width);
6815                     }
6816                     else
6817                     {
6818 #define STRETCH_ROW(type) \
6819 do { \
6820     const type *s = (const type *)sbuf; \
6821     type *d = (type *)dbuf; \
6822     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6823         d[x] = s[sx >> 16]; \
6824 } while(0)
6825
6826                         switch(bpp)
6827                         {
6828                             case 1:
6829                                 STRETCH_ROW(BYTE);
6830                                 break;
6831                             case 2:
6832                                 STRETCH_ROW(WORD);
6833                                 break;
6834                             case 4:
6835                                 STRETCH_ROW(DWORD);
6836                                 break;
6837                             case 3:
6838                             {
6839                                 const BYTE *s;
6840                                 BYTE *d = dbuf;
6841                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6842                                 {
6843                                     DWORD pixel;
6844
6845                                     s = sbuf + 3 * (sx >> 16);
6846                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6847                                     d[0] = (pixel      ) & 0xff;
6848                                     d[1] = (pixel >>  8) & 0xff;
6849                                     d[2] = (pixel >> 16) & 0xff;
6850                                     d += 3;
6851                                 }
6852                                 break;
6853                             }
6854                             default:
6855                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6856                                 hr = WINED3DERR_NOTAVAILABLE;
6857                                 goto error;
6858                         }
6859 #undef STRETCH_ROW
6860                     }
6861                     dbuf += dst_map.row_pitch;
6862                     last_sy = sy;
6863                 }
6864             }
6865         }
6866         else
6867         {
6868             LONG dstyinc = dst_map.row_pitch, dstxinc = bpp;
6869             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6870             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6871             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6872             {
6873                 /* The color keying flags are checked for correctness in ddraw */
6874                 if (flags & WINEDDBLT_KEYSRC)
6875                 {
6876                     keylow  = src_surface->src_blt_color_key.color_space_low_value;
6877                     keyhigh = src_surface->src_blt_color_key.color_space_high_value;
6878                 }
6879                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6880                 {
6881                     keylow = fx->ddckSrcColorkey.color_space_low_value;
6882                     keyhigh = fx->ddckSrcColorkey.color_space_high_value;
6883                 }
6884
6885                 if (flags & WINEDDBLT_KEYDEST)
6886                 {
6887                     /* Destination color keys are taken from the source surface! */
6888                     destkeylow = src_surface->dst_blt_color_key.color_space_low_value;
6889                     destkeyhigh = src_surface->dst_blt_color_key.color_space_high_value;
6890                 }
6891                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6892                 {
6893                     destkeylow = fx->ddckDestColorkey.color_space_low_value;
6894                     destkeyhigh = fx->ddckDestColorkey.color_space_high_value;
6895                 }
6896
6897                 if (bpp == 1)
6898                 {
6899                     keymask = 0xff;
6900                 }
6901                 else
6902                 {
6903                     keymask = src_format->red_mask
6904                             | src_format->green_mask
6905                             | src_format->blue_mask;
6906                 }
6907                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6908             }
6909
6910             if (flags & WINEDDBLT_DDFX)
6911             {
6912                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6913                 LONG tmpxy;
6914                 dTopLeft     = dbuf;
6915                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6916                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dst_map.row_pitch);
6917                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6918
6919                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6920                 {
6921                     /* I don't think we need to do anything about this flag */
6922                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6923                 }
6924                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6925                 {
6926                     tmp          = dTopRight;
6927                     dTopRight    = dTopLeft;
6928                     dTopLeft     = tmp;
6929                     tmp          = dBottomRight;
6930                     dBottomRight = dBottomLeft;
6931                     dBottomLeft  = tmp;
6932                     dstxinc = dstxinc * -1;
6933                 }
6934                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6935                 {
6936                     tmp          = dTopLeft;
6937                     dTopLeft     = dBottomLeft;
6938                     dBottomLeft  = tmp;
6939                     tmp          = dTopRight;
6940                     dTopRight    = dBottomRight;
6941                     dBottomRight = tmp;
6942                     dstyinc = dstyinc * -1;
6943                 }
6944                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6945                 {
6946                     /* I don't think we need to do anything about this flag */
6947                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6948                 }
6949                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6950                 {
6951                     tmp          = dBottomRight;
6952                     dBottomRight = dTopLeft;
6953                     dTopLeft     = tmp;
6954                     tmp          = dBottomLeft;
6955                     dBottomLeft  = dTopRight;
6956                     dTopRight    = tmp;
6957                     dstxinc = dstxinc * -1;
6958                     dstyinc = dstyinc * -1;
6959                 }
6960                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6961                 {
6962                     tmp          = dTopLeft;
6963                     dTopLeft     = dBottomLeft;
6964                     dBottomLeft  = dBottomRight;
6965                     dBottomRight = dTopRight;
6966                     dTopRight    = tmp;
6967                     tmpxy   = dstxinc;
6968                     dstxinc = dstyinc;
6969                     dstyinc = tmpxy;
6970                     dstxinc = dstxinc * -1;
6971                 }
6972                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6973                 {
6974                     tmp          = dTopLeft;
6975                     dTopLeft     = dTopRight;
6976                     dTopRight    = dBottomRight;
6977                     dBottomRight = dBottomLeft;
6978                     dBottomLeft  = tmp;
6979                     tmpxy   = dstxinc;
6980                     dstxinc = dstyinc;
6981                     dstyinc = tmpxy;
6982                     dstyinc = dstyinc * -1;
6983                 }
6984                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6985                 {
6986                     /* I don't think we need to do anything about this flag */
6987                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6988                 }
6989                 dbuf = dTopLeft;
6990                 flags &= ~(WINEDDBLT_DDFX);
6991             }
6992
6993 #define COPY_COLORKEY_FX(type) \
6994 do { \
6995     const type *s; \
6996     type *d = (type *)dbuf, *dx, tmp; \
6997     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
6998     { \
6999         s = (const type *)(sbase + (sy >> 16) * src_map.row_pitch); \
7000         dx = d; \
7001         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
7002         { \
7003             tmp = s[sx >> 16]; \
7004             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
7005                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
7006             { \
7007                 dx[0] = tmp; \
7008             } \
7009             dx = (type *)(((BYTE *)dx) + dstxinc); \
7010         } \
7011         d = (type *)(((BYTE *)d) + dstyinc); \
7012     } \
7013 } while(0)
7014
7015             switch (bpp)
7016             {
7017                 case 1:
7018                     COPY_COLORKEY_FX(BYTE);
7019                     break;
7020                 case 2:
7021                     COPY_COLORKEY_FX(WORD);
7022                     break;
7023                 case 4:
7024                     COPY_COLORKEY_FX(DWORD);
7025                     break;
7026                 case 3:
7027                 {
7028                     const BYTE *s;
7029                     BYTE *d = dbuf, *dx;
7030                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
7031                     {
7032                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
7033                         dx = d;
7034                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7035                         {
7036                             DWORD pixel, dpixel = 0;
7037                             s = sbuf + 3 * (sx>>16);
7038                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7039                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7040                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7041                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7042                             {
7043                                 dx[0] = (pixel      ) & 0xff;
7044                                 dx[1] = (pixel >>  8) & 0xff;
7045                                 dx[2] = (pixel >> 16) & 0xff;
7046                             }
7047                             dx += dstxinc;
7048                         }
7049                         d += dstyinc;
7050                     }
7051                     break;
7052                 }
7053                 default:
7054                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7055                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7056                     hr = WINED3DERR_NOTAVAILABLE;
7057                     goto error;
7058 #undef COPY_COLORKEY_FX
7059             }
7060         }
7061     }
7062
7063 error:
7064     if (flags && FIXME_ON(d3d_surface))
7065     {
7066         FIXME("\tUnsupported flags: %#x.\n", flags);
7067     }
7068
7069 release:
7070     wined3d_surface_unmap(dst_surface);
7071     if (src_surface && src_surface != dst_surface)
7072         wined3d_surface_unmap(src_surface);
7073     /* Release the converted surface, if any. */
7074     if (src_surface && src_surface != orig_src)
7075         wined3d_surface_decref(src_surface);
7076
7077     return hr;
7078 }
7079
7080 /* Do not call while under the GL lock. */
7081 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7082         const RECT *dst_rect, const struct wined3d_color *color)
7083 {
7084     static const RECT src_rect;
7085     WINEDDBLTFX BltFx;
7086
7087     memset(&BltFx, 0, sizeof(BltFx));
7088     BltFx.dwSize = sizeof(BltFx);
7089     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7090     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7091             WINEDDBLT_COLORFILL, &BltFx, WINED3DTEXF_POINT);
7092 }
7093
7094 /* Do not call while under the GL lock. */
7095 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7096         struct wined3d_surface *surface, const RECT *rect, float depth)
7097 {
7098     FIXME("Depth filling not implemented by cpu_blit.\n");
7099     return WINED3DERR_INVALIDCALL;
7100 }
7101
7102 const struct blit_shader cpu_blit =  {
7103     cpu_blit_alloc,
7104     cpu_blit_free,
7105     cpu_blit_set,
7106     cpu_blit_unset,
7107     cpu_blit_supported,
7108     cpu_blit_color_fill,
7109     cpu_blit_depth_fill,
7110 };
7111
7112 static HRESULT surface_init(struct wined3d_surface *surface, WINED3DSURFTYPE surface_type, UINT alignment,
7113         UINT width, UINT height, UINT level, enum wined3d_multisample_type multisample_type,
7114         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7115         WINED3DPOOL pool, DWORD flags, void *parent, const struct wined3d_parent_ops *parent_ops)
7116 {
7117     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7118     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7119     BOOL lockable = flags & WINED3D_SURFACE_MAPPABLE;
7120     unsigned int resource_size;
7121     HRESULT hr;
7122
7123     if (multisample_quality > 0)
7124     {
7125         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7126         multisample_quality = 0;
7127     }
7128
7129     /* Quick lockable sanity check.
7130      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7131      * this function is too deep to need to care about things like this.
7132      * Levels need to be checked too, since they all affect what can be done. */
7133     switch (pool)
7134     {
7135         case WINED3DPOOL_SCRATCH:
7136             if (!lockable)
7137             {
7138                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7139                         "which are mutually exclusive, setting lockable to TRUE.\n");
7140                 lockable = TRUE;
7141             }
7142             break;
7143
7144         case WINED3DPOOL_SYSTEMMEM:
7145             if (!lockable)
7146                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7147             break;
7148
7149         case WINED3DPOOL_MANAGED:
7150             if (usage & WINED3DUSAGE_DYNAMIC)
7151                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7152             break;
7153
7154         case WINED3DPOOL_DEFAULT:
7155             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7156                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7157             break;
7158
7159         default:
7160             FIXME("Unknown pool %#x.\n", pool);
7161             break;
7162     };
7163
7164     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3DPOOL_DEFAULT)
7165         FIXME("Trying to create a render target that isn't in the default pool.\n");
7166
7167     /* FIXME: Check that the format is supported by the device. */
7168
7169     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7170     if (!resource_size)
7171         return WINED3DERR_INVALIDCALL;
7172
7173     surface->surface_type = surface_type;
7174
7175     switch (surface_type)
7176     {
7177         case SURFACE_OPENGL:
7178             surface->surface_ops = &surface_ops;
7179             break;
7180
7181         case SURFACE_GDI:
7182             surface->surface_ops = &gdi_surface_ops;
7183             break;
7184
7185         default:
7186             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7187             return WINED3DERR_INVALIDCALL;
7188     }
7189
7190     hr = resource_init(&surface->resource, device, WINED3DRTYPE_SURFACE, format,
7191             multisample_type, multisample_quality, usage, pool, width, height, 1,
7192             resource_size, parent, parent_ops, &surface_resource_ops);
7193     if (FAILED(hr))
7194     {
7195         WARN("Failed to initialize resource, returning %#x.\n", hr);
7196         return hr;
7197     }
7198
7199     /* "Standalone" surface. */
7200     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7201
7202     surface->texture_level = level;
7203     list_init(&surface->overlays);
7204
7205     /* Flags */
7206     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7207     if (flags & WINED3D_SURFACE_DISCARD)
7208         surface->flags |= SFLAG_DISCARD;
7209     if (flags & WINED3D_SURFACE_PIN_SYSMEM)
7210         surface->flags |= SFLAG_PIN_SYSMEM;
7211     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7212         surface->flags |= SFLAG_LOCKABLE;
7213     /* I'm not sure if this qualifies as a hack or as an optimization. It
7214      * seems reasonable to assume that lockable render targets will get
7215      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7216      * creation. However, the other reason we want to do this is that several
7217      * ddraw applications access surface memory while the surface isn't
7218      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7219      * future locks prevents these from crashing. */
7220     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7221         surface->flags |= SFLAG_DYNLOCK;
7222
7223     /* Mark the texture as dirty so that it gets loaded first time around. */
7224     surface_add_dirty_rect(surface, NULL);
7225     list_init(&surface->renderbuffers);
7226
7227     TRACE("surface %p, memory %p, size %u\n",
7228             surface, surface->resource.allocatedMemory, surface->resource.size);
7229
7230     /* Call the private setup routine */
7231     hr = surface->surface_ops->surface_private_setup(surface);
7232     if (FAILED(hr))
7233     {
7234         ERR("Private setup failed, returning %#x\n", hr);
7235         surface_cleanup(surface);
7236         return hr;
7237     }
7238
7239     /* Similar to lockable rendertargets above, creating the DIB section
7240      * during surface initialization prevents the sysmem pointer from changing
7241      * after a wined3d_surface_getdc() call. */
7242     if ((usage & WINED3DUSAGE_OWNDC) && !surface->hDC
7243             && SUCCEEDED(surface_create_dib_section(surface)))
7244     {
7245         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
7246         surface->resource.heapMemory = NULL;
7247         surface->resource.allocatedMemory = surface->dib.bitmap_data;
7248     }
7249
7250     return hr;
7251 }
7252
7253 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7254         enum wined3d_format_id format_id, UINT level, DWORD usage, WINED3DPOOL pool,
7255         enum wined3d_multisample_type multisample_type, DWORD multisample_quality, WINED3DSURFTYPE surface_type,
7256         DWORD flags, void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7257 {
7258     struct wined3d_surface *object;
7259     HRESULT hr;
7260
7261     TRACE("device %p, width %u, height %u, format %s, level %u\n",
7262             device, width, height, debug_d3dformat(format_id), level);
7263     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7264             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7265     TRACE("surface_type %#x, flags %#x, parent %p, parent_ops %p.\n", surface_type, flags, parent, parent_ops);
7266
7267     if (surface_type == SURFACE_OPENGL && !device->adapter)
7268     {
7269         ERR("OpenGL surfaces are not available without OpenGL.\n");
7270         return WINED3DERR_NOTAVAILABLE;
7271     }
7272
7273     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7274     if (!object)
7275     {
7276         ERR("Failed to allocate surface memory.\n");
7277         return WINED3DERR_OUTOFVIDEOMEMORY;
7278     }
7279
7280     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level,
7281             multisample_type, multisample_quality, device, usage, format_id, pool, flags, parent, parent_ops);
7282     if (FAILED(hr))
7283     {
7284         WARN("Failed to initialize surface, returning %#x.\n", hr);
7285         HeapFree(GetProcessHeap(), 0, object);
7286         return hr;
7287     }
7288
7289     TRACE("Created surface %p.\n", object);
7290     *surface = object;
7291
7292     return hr;
7293 }