winealsa: Unify the checks for wBitsPerSample.
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2011 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         WINED3DTEXTUREFILTERTYPE filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     struct wined3d_surface *overlay, *cur;
46
47     TRACE("surface %p.\n", surface);
48
49     if (surface->texture_name || (surface->flags & SFLAG_PBO)
50              || surface->rb_multisample || surface->rb_resolved
51              || !list_empty(&surface->renderbuffers))
52     {
53         struct wined3d_renderbuffer_entry *entry, *entry2;
54         const struct wined3d_gl_info *gl_info;
55         struct wined3d_context *context;
56
57         context = context_acquire(surface->resource.device, NULL);
58         gl_info = context->gl_info;
59
60         ENTER_GL();
61
62         if (surface->texture_name)
63         {
64             TRACE("Deleting texture %u.\n", surface->texture_name);
65             glDeleteTextures(1, &surface->texture_name);
66         }
67
68         if (surface->flags & SFLAG_PBO)
69         {
70             TRACE("Deleting PBO %u.\n", surface->pbo);
71             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
72         }
73
74         if (surface->rb_multisample)
75         {
76             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
77             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
78         }
79
80         if (surface->rb_resolved)
81         {
82             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
83             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
84         }
85
86         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
87         {
88             TRACE("Deleting renderbuffer %u.\n", entry->id);
89             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
90             HeapFree(GetProcessHeap(), 0, entry);
91         }
92
93         LEAVE_GL();
94
95         context_release(context);
96     }
97
98     if (surface->flags & SFLAG_DIBSECTION)
99     {
100         DeleteDC(surface->hDC);
101         DeleteObject(surface->dib.DIBsection);
102         surface->dib.bitmap_data = NULL;
103         surface->resource.allocatedMemory = NULL;
104     }
105
106     if (surface->flags & SFLAG_USERPTR)
107         wined3d_surface_set_mem(surface, NULL);
108     if (surface->overlay_dest)
109         list_remove(&surface->overlay_entry);
110
111     LIST_FOR_EACH_ENTRY_SAFE(overlay, cur, &surface->overlays, struct wined3d_surface, overlay_entry)
112     {
113         list_remove(&overlay->overlay_entry);
114         overlay->overlay_dest = NULL;
115     }
116
117     resource_cleanup(&surface->resource);
118 }
119
120 void surface_update_draw_binding(struct wined3d_surface *surface)
121 {
122     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
123         surface->draw_binding = SFLAG_INDRAWABLE;
124     else if (surface->resource.multisample_type)
125         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
126     else
127         surface->draw_binding = SFLAG_INTEXTURE;
128 }
129
130 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
131 {
132     TRACE("surface %p, container %p.\n", surface, container);
133
134     if (!container && type != WINED3D_CONTAINER_NONE)
135         ERR("Setting NULL container of type %#x.\n", type);
136
137     if (type == WINED3D_CONTAINER_SWAPCHAIN)
138     {
139         surface->get_drawable_size = get_drawable_size_swapchain;
140     }
141     else
142     {
143         switch (wined3d_settings.offscreen_rendering_mode)
144         {
145             case ORM_FBO:
146                 surface->get_drawable_size = get_drawable_size_fbo;
147                 break;
148
149             case ORM_BACKBUFFER:
150                 surface->get_drawable_size = get_drawable_size_backbuffer;
151                 break;
152
153             default:
154                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
155                 return;
156         }
157     }
158
159     surface->container.type = type;
160     surface->container.u.base = container;
161     surface_update_draw_binding(surface);
162 }
163
164 struct blt_info
165 {
166     GLenum binding;
167     GLenum bind_target;
168     enum tex_types tex_type;
169     GLfloat coords[4][3];
170 };
171
172 struct float_rect
173 {
174     float l;
175     float t;
176     float r;
177     float b;
178 };
179
180 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
181 {
182     f->l = ((r->left * 2.0f) / w) - 1.0f;
183     f->t = ((r->top * 2.0f) / h) - 1.0f;
184     f->r = ((r->right * 2.0f) / w) - 1.0f;
185     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
186 }
187
188 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
189 {
190     GLfloat (*coords)[3] = info->coords;
191     struct float_rect f;
192
193     switch (target)
194     {
195         default:
196             FIXME("Unsupported texture target %#x\n", target);
197             /* Fall back to GL_TEXTURE_2D */
198         case GL_TEXTURE_2D:
199             info->binding = GL_TEXTURE_BINDING_2D;
200             info->bind_target = GL_TEXTURE_2D;
201             info->tex_type = tex_2d;
202             coords[0][0] = (float)rect->left / w;
203             coords[0][1] = (float)rect->top / h;
204             coords[0][2] = 0.0f;
205
206             coords[1][0] = (float)rect->right / w;
207             coords[1][1] = (float)rect->top / h;
208             coords[1][2] = 0.0f;
209
210             coords[2][0] = (float)rect->left / w;
211             coords[2][1] = (float)rect->bottom / h;
212             coords[2][2] = 0.0f;
213
214             coords[3][0] = (float)rect->right / w;
215             coords[3][1] = (float)rect->bottom / h;
216             coords[3][2] = 0.0f;
217             break;
218
219         case GL_TEXTURE_RECTANGLE_ARB:
220             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
221             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
222             info->tex_type = tex_rect;
223             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
224             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
225             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
226             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
227             break;
228
229         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
230             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
231             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
232             info->tex_type = tex_cube;
233             cube_coords_float(rect, w, h, &f);
234
235             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
236             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
237             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
238             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
239             break;
240
241         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
242             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
243             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
244             info->tex_type = tex_cube;
245             cube_coords_float(rect, w, h, &f);
246
247             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
248             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
249             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
250             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
251             break;
252
253         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
254             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
255             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
256             info->tex_type = tex_cube;
257             cube_coords_float(rect, w, h, &f);
258
259             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
260             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
261             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
262             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
263             break;
264
265         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
266             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
267             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
268             info->tex_type = tex_cube;
269             cube_coords_float(rect, w, h, &f);
270
271             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
272             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
273             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
274             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
275             break;
276
277         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
278             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
279             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
280             info->tex_type = tex_cube;
281             cube_coords_float(rect, w, h, &f);
282
283             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
284             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
285             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
286             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
287             break;
288
289         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
290             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
291             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
292             info->tex_type = tex_cube;
293             cube_coords_float(rect, w, h, &f);
294
295             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
296             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
297             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
298             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
299             break;
300     }
301 }
302
303 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
304 {
305     if (rect_in)
306         *rect_out = *rect_in;
307     else
308     {
309         rect_out->left = 0;
310         rect_out->top = 0;
311         rect_out->right = surface->resource.width;
312         rect_out->bottom = surface->resource.height;
313     }
314 }
315
316 /* GL locking and context activation is done by the caller */
317 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
318         const RECT *src_rect, const RECT *dst_rect, WINED3DTEXTUREFILTERTYPE Filter)
319 {
320     struct blt_info info;
321
322     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
323
324     glEnable(info.bind_target);
325     checkGLcall("glEnable(bind_target)");
326
327     context_bind_texture(context, info.bind_target, src_surface->texture_name);
328
329     /* Filtering for StretchRect */
330     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
331             wined3d_gl_mag_filter(magLookup, Filter));
332     checkGLcall("glTexParameteri");
333     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
334             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
335     checkGLcall("glTexParameteri");
336     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
337     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
338     if (context->gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
339         glTexParameteri(info.bind_target, GL_TEXTURE_SRGB_DECODE_EXT, GL_SKIP_DECODE_EXT);
340     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
341     checkGLcall("glTexEnvi");
342
343     /* Draw a quad */
344     glBegin(GL_TRIANGLE_STRIP);
345     glTexCoord3fv(info.coords[0]);
346     glVertex2i(dst_rect->left, dst_rect->top);
347
348     glTexCoord3fv(info.coords[1]);
349     glVertex2i(dst_rect->right, dst_rect->top);
350
351     glTexCoord3fv(info.coords[2]);
352     glVertex2i(dst_rect->left, dst_rect->bottom);
353
354     glTexCoord3fv(info.coords[3]);
355     glVertex2i(dst_rect->right, dst_rect->bottom);
356     glEnd();
357
358     /* Unbind the texture */
359     context_bind_texture(context, info.bind_target, 0);
360
361     /* We changed the filtering settings on the texture. Inform the
362      * container about this to get the filters reset properly next draw. */
363     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
364     {
365         struct wined3d_texture *texture = src_surface->container.u.texture;
366         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3DTEXF_POINT;
367         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3DTEXF_POINT;
368         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3DTEXF_NONE;
369         texture->texture_rgb.states[WINED3DTEXSTA_SRGBTEXTURE] = FALSE;
370     }
371 }
372
373 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
374 {
375     const struct wined3d_format *format = surface->resource.format;
376     SYSTEM_INFO sysInfo;
377     BITMAPINFO *b_info;
378     int extraline = 0;
379     DWORD *masks;
380     UINT usage;
381     HDC dc;
382
383     TRACE("surface %p.\n", surface);
384
385     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
386     {
387         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
388         return WINED3DERR_INVALIDCALL;
389     }
390
391     switch (format->byte_count)
392     {
393         case 2:
394         case 4:
395             /* Allocate extra space to store the RGB bit masks. */
396             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
397             break;
398
399         case 3:
400             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
401             break;
402
403         default:
404             /* Allocate extra space for a palette. */
405             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
406                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
407             break;
408     }
409
410     if (!b_info)
411         return E_OUTOFMEMORY;
412
413     /* Some applications access the surface in via DWORDs, and do not take
414      * the necessary care at the end of the surface. So we need at least
415      * 4 extra bytes at the end of the surface. Check against the page size,
416      * if the last page used for the surface has at least 4 spare bytes we're
417      * safe, otherwise add an extra line to the DIB section. */
418     GetSystemInfo(&sysInfo);
419     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
420     {
421         extraline = 1;
422         TRACE("Adding an extra line to the DIB section.\n");
423     }
424
425     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
426     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
427     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
428     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
429     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
430             * wined3d_surface_get_pitch(surface);
431     b_info->bmiHeader.biPlanes = 1;
432     b_info->bmiHeader.biBitCount = format->byte_count * 8;
433
434     b_info->bmiHeader.biXPelsPerMeter = 0;
435     b_info->bmiHeader.biYPelsPerMeter = 0;
436     b_info->bmiHeader.biClrUsed = 0;
437     b_info->bmiHeader.biClrImportant = 0;
438
439     /* Get the bit masks */
440     masks = (DWORD *)b_info->bmiColors;
441     switch (surface->resource.format->id)
442     {
443         case WINED3DFMT_B8G8R8_UNORM:
444             usage = DIB_RGB_COLORS;
445             b_info->bmiHeader.biCompression = BI_RGB;
446             break;
447
448         case WINED3DFMT_B5G5R5X1_UNORM:
449         case WINED3DFMT_B5G5R5A1_UNORM:
450         case WINED3DFMT_B4G4R4A4_UNORM:
451         case WINED3DFMT_B4G4R4X4_UNORM:
452         case WINED3DFMT_B2G3R3_UNORM:
453         case WINED3DFMT_B2G3R3A8_UNORM:
454         case WINED3DFMT_R10G10B10A2_UNORM:
455         case WINED3DFMT_R8G8B8A8_UNORM:
456         case WINED3DFMT_R8G8B8X8_UNORM:
457         case WINED3DFMT_B10G10R10A2_UNORM:
458         case WINED3DFMT_B5G6R5_UNORM:
459         case WINED3DFMT_R16G16B16A16_UNORM:
460             usage = 0;
461             b_info->bmiHeader.biCompression = BI_BITFIELDS;
462             masks[0] = format->red_mask;
463             masks[1] = format->green_mask;
464             masks[2] = format->blue_mask;
465             break;
466
467         default:
468             /* Don't know palette */
469             b_info->bmiHeader.biCompression = BI_RGB;
470             usage = 0;
471             break;
472     }
473
474     if (!(dc = GetDC(0)))
475     {
476         HeapFree(GetProcessHeap(), 0, b_info);
477         return HRESULT_FROM_WIN32(GetLastError());
478     }
479
480     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
481             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
482             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
483     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
484     ReleaseDC(0, dc);
485
486     if (!surface->dib.DIBsection)
487     {
488         ERR("Failed to create DIB section.\n");
489         HeapFree(GetProcessHeap(), 0, b_info);
490         return HRESULT_FROM_WIN32(GetLastError());
491     }
492
493     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
494     /* Copy the existing surface to the dib section. */
495     if (surface->resource.allocatedMemory)
496     {
497         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
498                 surface->resource.height * wined3d_surface_get_pitch(surface));
499     }
500     else
501     {
502         /* This is to make maps read the GL texture although memory is allocated. */
503         surface->flags &= ~SFLAG_INSYSMEM;
504     }
505     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
506
507     HeapFree(GetProcessHeap(), 0, b_info);
508
509     /* Now allocate a DC. */
510     surface->hDC = CreateCompatibleDC(0);
511     SelectObject(surface->hDC, surface->dib.DIBsection);
512     TRACE("Using wined3d palette %p.\n", surface->palette);
513     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
514
515     surface->flags |= SFLAG_DIBSECTION;
516
517     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
518     surface->resource.heapMemory = NULL;
519
520     return WINED3D_OK;
521 }
522
523 static BOOL surface_need_pbo(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
524 {
525     if (surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
526         return FALSE;
527     if (!(surface->flags & SFLAG_DYNLOCK))
528         return FALSE;
529     if (surface->flags & (SFLAG_CONVERTED | SFLAG_NONPOW2 | SFLAG_PIN_SYSMEM))
530         return FALSE;
531     if (!gl_info->supported[ARB_PIXEL_BUFFER_OBJECT])
532         return FALSE;
533
534     return TRUE;
535 }
536
537 static void surface_load_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
538 {
539     struct wined3d_context *context;
540     GLenum error;
541
542     context = context_acquire(surface->resource.device, NULL);
543     ENTER_GL();
544
545     GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
546     error = glGetError();
547     if (!surface->pbo || error != GL_NO_ERROR)
548         ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
549
550     TRACE("Binding PBO %u.\n", surface->pbo);
551
552     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
553     checkGLcall("glBindBufferARB");
554
555     GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
556             surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
557     checkGLcall("glBufferDataARB");
558
559     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
560     checkGLcall("glBindBufferARB");
561
562     /* We don't need the system memory anymore and we can't even use it for PBOs. */
563     if (!(surface->flags & SFLAG_CLIENT))
564     {
565         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
566         surface->resource.heapMemory = NULL;
567     }
568     surface->resource.allocatedMemory = NULL;
569     surface->flags |= SFLAG_PBO;
570     LEAVE_GL();
571     context_release(context);
572 }
573
574 static void surface_prepare_system_memory(struct wined3d_surface *surface)
575 {
576     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
577
578     TRACE("surface %p.\n", surface);
579
580     if (!(surface->flags & SFLAG_PBO) && surface_need_pbo(surface, gl_info))
581         surface_load_pbo(surface, gl_info);
582     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
583     {
584         /* Whatever surface we have, make sure that there is memory allocated
585          * for the downloaded copy, or a PBO to map. */
586         if (!surface->resource.heapMemory)
587             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
588
589         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
590                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
591
592         if (surface->flags & SFLAG_INSYSMEM)
593             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
594     }
595 }
596
597 static void surface_evict_sysmem(struct wined3d_surface *surface)
598 {
599     if (surface->flags & SFLAG_DONOTFREE)
600         return;
601
602     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
603     surface->resource.allocatedMemory = NULL;
604     surface->resource.heapMemory = NULL;
605     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
606 }
607
608 /* Context activation is done by the caller. */
609 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
610         struct wined3d_context *context, BOOL srgb)
611 {
612     struct wined3d_device *device = surface->resource.device;
613     DWORD active_sampler;
614
615     /* We don't need a specific texture unit, but after binding the texture
616      * the current unit is dirty. Read the unit back instead of switching to
617      * 0, this avoids messing around with the state manager's GL states. The
618      * current texture unit should always be a valid one.
619      *
620      * To be more specific, this is tricky because we can implicitly be
621      * called from sampler() in state.c. This means we can't touch anything
622      * other than whatever happens to be the currently active texture, or we
623      * would risk marking already applied sampler states dirty again. */
624     active_sampler = device->rev_tex_unit_map[context->active_texture];
625
626     if (active_sampler != WINED3D_UNMAPPED_STAGE)
627         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
628     surface_bind(surface, context, srgb);
629 }
630
631 static void surface_force_reload(struct wined3d_surface *surface)
632 {
633     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
634 }
635
636 static void surface_release_client_storage(struct wined3d_surface *surface)
637 {
638     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
639
640     ENTER_GL();
641     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
642     if (surface->texture_name)
643     {
644         surface_bind_and_dirtify(surface, context, FALSE);
645         glTexImage2D(surface->texture_target, surface->texture_level,
646                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
647     }
648     if (surface->texture_name_srgb)
649     {
650         surface_bind_and_dirtify(surface, context, TRUE);
651         glTexImage2D(surface->texture_target, surface->texture_level,
652                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
653     }
654     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
655     LEAVE_GL();
656
657     context_release(context);
658
659     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
660     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
661     surface_force_reload(surface);
662 }
663
664 static HRESULT surface_private_setup(struct wined3d_surface *surface)
665 {
666     /* TODO: Check against the maximum texture sizes supported by the video card. */
667     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
668     unsigned int pow2Width, pow2Height;
669
670     TRACE("surface %p.\n", surface);
671
672     surface->texture_name = 0;
673     surface->texture_target = GL_TEXTURE_2D;
674
675     /* Non-power2 support */
676     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
677     {
678         pow2Width = surface->resource.width;
679         pow2Height = surface->resource.height;
680     }
681     else
682     {
683         /* Find the nearest pow2 match */
684         pow2Width = pow2Height = 1;
685         while (pow2Width < surface->resource.width)
686             pow2Width <<= 1;
687         while (pow2Height < surface->resource.height)
688             pow2Height <<= 1;
689     }
690     surface->pow2Width = pow2Width;
691     surface->pow2Height = pow2Height;
692
693     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
694     {
695         /* TODO: Add support for non power two compressed textures. */
696         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
697         {
698             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
699                   surface, surface->resource.width, surface->resource.height);
700             return WINED3DERR_NOTAVAILABLE;
701         }
702     }
703
704     if (pow2Width != surface->resource.width
705             || pow2Height != surface->resource.height)
706     {
707         surface->flags |= SFLAG_NONPOW2;
708     }
709
710     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
711             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
712     {
713         /* One of three options:
714          * 1: Do the same as we do with NPOT and scale the texture, (any
715          *    texture ops would require the texture to be scaled which is
716          *    potentially slow)
717          * 2: Set the texture to the maximum size (bad idea).
718          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
719          * 4: Create the surface, but allow it to be used only for DirectDraw
720          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
721          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
722          *    the render target. */
723         if (surface->resource.pool == WINED3DPOOL_DEFAULT || surface->resource.pool == WINED3DPOOL_MANAGED)
724         {
725             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
726             return WINED3DERR_NOTAVAILABLE;
727         }
728
729         /* We should never use this surface in combination with OpenGL! */
730         TRACE("Creating an oversized surface: %ux%u.\n",
731                 surface->pow2Width, surface->pow2Height);
732     }
733     else
734     {
735         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
736          * and EXT_PALETTED_TEXTURE is used in combination with texture
737          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
738          * EXT_PALETTED_TEXTURE doesn't work in combination with
739          * ARB_TEXTURE_RECTANGLE. */
740         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
741                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
742                 && gl_info->supported[EXT_PALETTED_TEXTURE]
743                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
744         {
745             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
746             surface->pow2Width = surface->resource.width;
747             surface->pow2Height = surface->resource.height;
748             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
749         }
750     }
751
752     switch (wined3d_settings.offscreen_rendering_mode)
753     {
754         case ORM_FBO:
755             surface->get_drawable_size = get_drawable_size_fbo;
756             break;
757
758         case ORM_BACKBUFFER:
759             surface->get_drawable_size = get_drawable_size_backbuffer;
760             break;
761
762         default:
763             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
764             return WINED3DERR_INVALIDCALL;
765     }
766
767     surface->flags |= SFLAG_INSYSMEM;
768
769     return WINED3D_OK;
770 }
771
772 static void surface_realize_palette(struct wined3d_surface *surface)
773 {
774     struct wined3d_palette *palette = surface->palette;
775
776     TRACE("surface %p.\n", surface);
777
778     if (!palette) return;
779
780     if (surface->resource.format->id == WINED3DFMT_P8_UINT
781             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
782     {
783         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
784         {
785             /* Make sure the texture is up to date. This call doesn't do
786              * anything if the texture is already up to date. */
787             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
788
789             /* We want to force a palette refresh, so mark the drawable as not being up to date */
790             if (!surface_is_offscreen(surface))
791                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
792         }
793         else
794         {
795             if (!(surface->flags & SFLAG_INSYSMEM))
796             {
797                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
798                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
799             }
800             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
801         }
802     }
803
804     if (surface->flags & SFLAG_DIBSECTION)
805     {
806         RGBQUAD col[256];
807         unsigned int i;
808
809         TRACE("Updating the DC's palette.\n");
810
811         for (i = 0; i < 256; ++i)
812         {
813             col[i].rgbRed   = palette->palents[i].peRed;
814             col[i].rgbGreen = palette->palents[i].peGreen;
815             col[i].rgbBlue  = palette->palents[i].peBlue;
816             col[i].rgbReserved = 0;
817         }
818         SetDIBColorTable(surface->hDC, 0, 256, col);
819     }
820
821     /* Propagate the changes to the drawable when we have a palette. */
822     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
823         surface_load_location(surface, surface->draw_binding, NULL);
824 }
825
826 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
827 {
828     HRESULT hr;
829
830     /* If there's no destination surface there is nothing to do. */
831     if (!surface->overlay_dest)
832         return WINED3D_OK;
833
834     /* Blt calls ModifyLocation on the dest surface, which in turn calls
835      * DrawOverlay to update the overlay. Prevent an endless recursion. */
836     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
837         return WINED3D_OK;
838
839     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
840     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
841             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3DTEXF_LINEAR);
842     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
843
844     return hr;
845 }
846
847 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
848 {
849     struct wined3d_device *device = surface->resource.device;
850     const RECT *pass_rect = rect;
851
852     TRACE("surface %p, rect %s, flags %#x.\n",
853             surface, wine_dbgstr_rect(rect), flags);
854
855     if (flags & WINED3DLOCK_DISCARD)
856     {
857         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
858         surface_prepare_system_memory(surface);
859         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
860     }
861     else
862     {
863         /* surface_load_location() does not check if the rectangle specifies
864          * the full surface. Most callers don't need that, so do it here. */
865         if (rect && !rect->top && !rect->left
866                 && rect->right == surface->resource.width
867                 && rect->bottom == surface->resource.height)
868             pass_rect = NULL;
869         surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
870     }
871
872     if (surface->flags & SFLAG_PBO)
873     {
874         const struct wined3d_gl_info *gl_info;
875         struct wined3d_context *context;
876
877         context = context_acquire(device, NULL);
878         gl_info = context->gl_info;
879
880         ENTER_GL();
881         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
882         checkGLcall("glBindBufferARB");
883
884         /* This shouldn't happen but could occur if some other function
885          * didn't handle the PBO properly. */
886         if (surface->resource.allocatedMemory)
887             ERR("The surface already has PBO memory allocated.\n");
888
889         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
890         checkGLcall("glMapBufferARB");
891
892         /* Make sure the PBO isn't set anymore in order not to break non-PBO
893          * calls. */
894         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
895         checkGLcall("glBindBufferARB");
896
897         LEAVE_GL();
898         context_release(context);
899     }
900
901     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
902     {
903         if (!rect)
904             surface_add_dirty_rect(surface, NULL);
905         else
906         {
907             struct wined3d_box b;
908
909             b.left = rect->left;
910             b.top = rect->top;
911             b.right = rect->right;
912             b.bottom = rect->bottom;
913             b.front = 0;
914             b.back = 1;
915             surface_add_dirty_rect(surface, &b);
916         }
917     }
918 }
919
920 static void surface_unmap(struct wined3d_surface *surface)
921 {
922     struct wined3d_device *device = surface->resource.device;
923     BOOL fullsurface;
924
925     TRACE("surface %p.\n", surface);
926
927     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
928
929     if (surface->flags & SFLAG_PBO)
930     {
931         const struct wined3d_gl_info *gl_info;
932         struct wined3d_context *context;
933
934         TRACE("Freeing PBO memory.\n");
935
936         context = context_acquire(device, NULL);
937         gl_info = context->gl_info;
938
939         ENTER_GL();
940         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
941         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
942         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
943         checkGLcall("glUnmapBufferARB");
944         LEAVE_GL();
945         context_release(context);
946
947         surface->resource.allocatedMemory = NULL;
948     }
949
950     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
951
952     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
953     {
954         TRACE("Not dirtified, nothing to do.\n");
955         goto done;
956     }
957
958     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
959             || (device->fb.render_targets && surface == device->fb.render_targets[0]))
960     {
961         if (!surface->dirtyRect.left && !surface->dirtyRect.top
962                 && surface->dirtyRect.right == surface->resource.width
963                 && surface->dirtyRect.bottom == surface->resource.height)
964         {
965             fullsurface = TRUE;
966         }
967         else
968         {
969             /* TODO: Proper partial rectangle tracking. */
970             fullsurface = FALSE;
971             surface->flags |= SFLAG_INSYSMEM;
972         }
973
974         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
975
976         /* Partial rectangle tracking is not commonly implemented, it is only
977          * done for render targets. INSYSMEM was set before to tell
978          * surface_load_location() where to read the rectangle from.
979          * Indrawable is set because all modifications from the partial
980          * sysmem copy are written back to the drawable, thus the surface is
981          * merged again in the drawable. The sysmem copy is not fully up to
982          * date because only a subrectangle was read in Map(). */
983         if (!fullsurface)
984         {
985             surface_modify_location(surface, surface->draw_binding, TRUE);
986             surface_evict_sysmem(surface);
987         }
988
989         surface->dirtyRect.left = surface->resource.width;
990         surface->dirtyRect.top = surface->resource.height;
991         surface->dirtyRect.right = 0;
992         surface->dirtyRect.bottom = 0;
993     }
994     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
995     {
996         FIXME("Depth / stencil buffer locking is not implemented.\n");
997     }
998
999 done:
1000     /* Overlays have to be redrawn manually after changes with the GL implementation */
1001     if (surface->overlay_dest)
1002         surface_draw_overlay(surface);
1003 }
1004
1005 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1006 {
1007     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1008         return FALSE;
1009     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1010         return FALSE;
1011     return TRUE;
1012 }
1013
1014 static void wined3d_surface_depth_blt_fbo(const struct wined3d_device *device, struct wined3d_surface *src_surface,
1015         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1016 {
1017     const struct wined3d_gl_info *gl_info;
1018     struct wined3d_context *context;
1019     DWORD src_mask, dst_mask;
1020     GLbitfield gl_mask;
1021
1022     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1023             device, src_surface, wine_dbgstr_rect(src_rect),
1024             dst_surface, wine_dbgstr_rect(dst_rect));
1025
1026     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1027     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1028
1029     if (src_mask != dst_mask)
1030     {
1031         ERR("Incompatible formats %s and %s.\n",
1032                 debug_d3dformat(src_surface->resource.format->id),
1033                 debug_d3dformat(dst_surface->resource.format->id));
1034         return;
1035     }
1036
1037     if (!src_mask)
1038     {
1039         ERR("Not a depth / stencil format: %s.\n",
1040                 debug_d3dformat(src_surface->resource.format->id));
1041         return;
1042     }
1043
1044     gl_mask = 0;
1045     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1046         gl_mask |= GL_DEPTH_BUFFER_BIT;
1047     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1048         gl_mask |= GL_STENCIL_BUFFER_BIT;
1049
1050     /* Make sure the locations are up-to-date. Loading the destination
1051      * surface isn't required if the entire surface is overwritten. */
1052     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1053     if (!surface_is_full_rect(dst_surface, dst_rect))
1054         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1055
1056     context = context_acquire(device, NULL);
1057     if (!context->valid)
1058     {
1059         context_release(context);
1060         WARN("Invalid context, skipping blit.\n");
1061         return;
1062     }
1063
1064     gl_info = context->gl_info;
1065
1066     ENTER_GL();
1067
1068     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1069     glReadBuffer(GL_NONE);
1070     checkGLcall("glReadBuffer()");
1071     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1072
1073     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1074     context_set_draw_buffer(context, GL_NONE);
1075     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1076
1077     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1078     {
1079         glDepthMask(GL_TRUE);
1080         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_ZWRITEENABLE));
1081     }
1082     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1083     {
1084         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1085         {
1086             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1087             context_invalidate_state(context, STATE_RENDER(WINED3D_RS_TWOSIDEDSTENCILMODE));
1088         }
1089         glStencilMask(~0U);
1090         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_STENCILWRITEMASK));
1091     }
1092
1093     glDisable(GL_SCISSOR_TEST);
1094     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1095
1096     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1097             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1098     checkGLcall("glBlitFramebuffer()");
1099
1100     LEAVE_GL();
1101
1102     if (wined3d_settings.strict_draw_ordering)
1103         wglFlush(); /* Flush to ensure ordering across contexts. */
1104
1105     context_release(context);
1106 }
1107
1108 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1109  * Depth / stencil is not supported. */
1110 static void surface_blt_fbo(const struct wined3d_device *device, const WINED3DTEXTUREFILTERTYPE filter,
1111         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1112         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1113 {
1114     const struct wined3d_gl_info *gl_info;
1115     struct wined3d_context *context;
1116     RECT src_rect, dst_rect;
1117     GLenum gl_filter;
1118     GLenum buffer;
1119
1120     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1121     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1122             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1123     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1124             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1125
1126     src_rect = *src_rect_in;
1127     dst_rect = *dst_rect_in;
1128
1129     switch (filter)
1130     {
1131         case WINED3DTEXF_LINEAR:
1132             gl_filter = GL_LINEAR;
1133             break;
1134
1135         default:
1136             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1137         case WINED3DTEXF_NONE:
1138         case WINED3DTEXF_POINT:
1139             gl_filter = GL_NEAREST;
1140             break;
1141     }
1142
1143     /* Resolve the source surface first if needed. */
1144     if (src_location == SFLAG_INRB_MULTISAMPLE
1145             && (src_surface->resource.format->id != dst_surface->resource.format->id
1146                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1147                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1148         src_location = SFLAG_INRB_RESOLVED;
1149
1150     /* Make sure the locations are up-to-date. Loading the destination
1151      * surface isn't required if the entire surface is overwritten. (And is
1152      * in fact harmful if we're being called by surface_load_location() with
1153      * the purpose of loading the destination surface.) */
1154     surface_load_location(src_surface, src_location, NULL);
1155     if (!surface_is_full_rect(dst_surface, &dst_rect))
1156         surface_load_location(dst_surface, dst_location, NULL);
1157
1158     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1159     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1160     else context = context_acquire(device, NULL);
1161
1162     if (!context->valid)
1163     {
1164         context_release(context);
1165         WARN("Invalid context, skipping blit.\n");
1166         return;
1167     }
1168
1169     gl_info = context->gl_info;
1170
1171     if (src_location == SFLAG_INDRAWABLE)
1172     {
1173         TRACE("Source surface %p is onscreen.\n", src_surface);
1174         buffer = surface_get_gl_buffer(src_surface);
1175         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1176     }
1177     else
1178     {
1179         TRACE("Source surface %p is offscreen.\n", src_surface);
1180         buffer = GL_COLOR_ATTACHMENT0;
1181     }
1182
1183     ENTER_GL();
1184     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1185     glReadBuffer(buffer);
1186     checkGLcall("glReadBuffer()");
1187     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1188     LEAVE_GL();
1189
1190     if (dst_location == SFLAG_INDRAWABLE)
1191     {
1192         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1193         buffer = surface_get_gl_buffer(dst_surface);
1194         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1195     }
1196     else
1197     {
1198         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1199         buffer = GL_COLOR_ATTACHMENT0;
1200     }
1201
1202     ENTER_GL();
1203     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1204     context_set_draw_buffer(context, buffer);
1205     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1206     context_invalidate_state(context, STATE_FRAMEBUFFER);
1207
1208     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1209     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE));
1210     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE1));
1211     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE2));
1212     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE3));
1213
1214     glDisable(GL_SCISSOR_TEST);
1215     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1216
1217     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1218             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1219     checkGLcall("glBlitFramebuffer()");
1220
1221     LEAVE_GL();
1222
1223     if (wined3d_settings.strict_draw_ordering
1224             || (dst_location == SFLAG_INDRAWABLE
1225             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1226         wglFlush();
1227
1228     context_release(context);
1229 }
1230
1231 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1232         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
1233         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
1234 {
1235     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1236         return FALSE;
1237
1238     /* Source and/or destination need to be on the GL side */
1239     if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
1240         return FALSE;
1241
1242     switch (blit_op)
1243     {
1244         case WINED3D_BLIT_OP_COLOR_BLIT:
1245             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1246                 return FALSE;
1247             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1248                 return FALSE;
1249             break;
1250
1251         case WINED3D_BLIT_OP_DEPTH_BLIT:
1252             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1253                 return FALSE;
1254             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1255                 return FALSE;
1256             break;
1257
1258         default:
1259             return FALSE;
1260     }
1261
1262     if (!(src_format->id == dst_format->id
1263             || (is_identity_fixup(src_format->color_fixup)
1264             && is_identity_fixup(dst_format->color_fixup))))
1265         return FALSE;
1266
1267     return TRUE;
1268 }
1269
1270 /* This function checks if the primary render target uses the 8bit paletted format. */
1271 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1272 {
1273     if (device->fb.render_targets && device->fb.render_targets[0])
1274     {
1275         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1276         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1277                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1278             return TRUE;
1279     }
1280     return FALSE;
1281 }
1282
1283 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1284         DWORD color, struct wined3d_color *float_color)
1285 {
1286     const struct wined3d_format *format = surface->resource.format;
1287     const struct wined3d_device *device = surface->resource.device;
1288
1289     switch (format->id)
1290     {
1291         case WINED3DFMT_P8_UINT:
1292             if (surface->palette)
1293             {
1294                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1295                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1296                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1297             }
1298             else
1299             {
1300                 float_color->r = 0.0f;
1301                 float_color->g = 0.0f;
1302                 float_color->b = 0.0f;
1303             }
1304             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1305             break;
1306
1307         case WINED3DFMT_B5G6R5_UNORM:
1308             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1309             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1310             float_color->b = (color & 0x1f) / 31.0f;
1311             float_color->a = 1.0f;
1312             break;
1313
1314         case WINED3DFMT_B8G8R8_UNORM:
1315         case WINED3DFMT_B8G8R8X8_UNORM:
1316             float_color->r = D3DCOLOR_R(color);
1317             float_color->g = D3DCOLOR_G(color);
1318             float_color->b = D3DCOLOR_B(color);
1319             float_color->a = 1.0f;
1320             break;
1321
1322         case WINED3DFMT_B8G8R8A8_UNORM:
1323             float_color->r = D3DCOLOR_R(color);
1324             float_color->g = D3DCOLOR_G(color);
1325             float_color->b = D3DCOLOR_B(color);
1326             float_color->a = D3DCOLOR_A(color);
1327             break;
1328
1329         default:
1330             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1331             return FALSE;
1332     }
1333
1334     return TRUE;
1335 }
1336
1337 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1338 {
1339     const struct wined3d_format *format = surface->resource.format;
1340
1341     switch (format->id)
1342     {
1343         case WINED3DFMT_S1_UINT_D15_UNORM:
1344             *float_depth = depth / (float)0x00007fff;
1345             break;
1346
1347         case WINED3DFMT_D16_UNORM:
1348             *float_depth = depth / (float)0x0000ffff;
1349             break;
1350
1351         case WINED3DFMT_D24_UNORM_S8_UINT:
1352         case WINED3DFMT_X8D24_UNORM:
1353             *float_depth = depth / (float)0x00ffffff;
1354             break;
1355
1356         case WINED3DFMT_D32_UNORM:
1357             *float_depth = depth / (float)0xffffffff;
1358             break;
1359
1360         default:
1361             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1362             return FALSE;
1363     }
1364
1365     return TRUE;
1366 }
1367
1368 /* Do not call while under the GL lock. */
1369 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1370 {
1371     const struct wined3d_resource *resource = &surface->resource;
1372     struct wined3d_device *device = resource->device;
1373     const struct blit_shader *blitter;
1374
1375     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1376             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1377     if (!blitter)
1378     {
1379         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1380         return WINED3DERR_INVALIDCALL;
1381     }
1382
1383     return blitter->depth_fill(device, surface, rect, depth);
1384 }
1385
1386 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1387         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1388 {
1389     struct wined3d_device *device = src_surface->resource.device;
1390
1391     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1392             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1393             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1394         return WINED3DERR_INVALIDCALL;
1395
1396     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1397
1398     surface_modify_ds_location(dst_surface, SFLAG_DS_OFFSCREEN,
1399             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1400     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
1401
1402     return WINED3D_OK;
1403 }
1404
1405 /* Do not call while under the GL lock. */
1406 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1407         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1408         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1409 {
1410     struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1411     struct wined3d_device *device = dst_surface->resource.device;
1412     DWORD src_ds_flags, dst_ds_flags;
1413     RECT src_rect, dst_rect;
1414     BOOL scale, convert;
1415
1416     static const DWORD simple_blit = WINEDDBLT_ASYNC
1417             | WINEDDBLT_COLORFILL
1418             | WINEDDBLT_WAIT
1419             | WINEDDBLT_DEPTHFILL
1420             | WINEDDBLT_DONOTWAIT;
1421
1422     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1423             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1424             flags, fx, debug_d3dtexturefiltertype(filter));
1425     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1426
1427     if (fx)
1428     {
1429         TRACE("dwSize %#x.\n", fx->dwSize);
1430         TRACE("dwDDFX %#x.\n", fx->dwDDFX);
1431         TRACE("dwROP %#x.\n", fx->dwROP);
1432         TRACE("dwDDROP %#x.\n", fx->dwDDROP);
1433         TRACE("dwRotationAngle %#x.\n", fx->dwRotationAngle);
1434         TRACE("dwZBufferOpCode %#x.\n", fx->dwZBufferOpCode);
1435         TRACE("dwZBufferLow %#x.\n", fx->dwZBufferLow);
1436         TRACE("dwZBufferHigh %#x.\n", fx->dwZBufferHigh);
1437         TRACE("dwZBufferBaseDest %#x.\n", fx->dwZBufferBaseDest);
1438         TRACE("dwZDestConstBitDepth %#x.\n", fx->dwZDestConstBitDepth);
1439         TRACE("lpDDSZBufferDest %p.\n", fx->u1.lpDDSZBufferDest);
1440         TRACE("dwZSrcConstBitDepth %#x.\n", fx->dwZSrcConstBitDepth);
1441         TRACE("lpDDSZBufferSrc %p.\n", fx->u2.lpDDSZBufferSrc);
1442         TRACE("dwAlphaEdgeBlendBitDepth %#x.\n", fx->dwAlphaEdgeBlendBitDepth);
1443         TRACE("dwAlphaEdgeBlend %#x.\n", fx->dwAlphaEdgeBlend);
1444         TRACE("dwReserved %#x.\n", fx->dwReserved);
1445         TRACE("dwAlphaDestConstBitDepth %#x.\n", fx->dwAlphaDestConstBitDepth);
1446         TRACE("lpDDSAlphaDest %p.\n", fx->u3.lpDDSAlphaDest);
1447         TRACE("dwAlphaSrcConstBitDepth %#x.\n", fx->dwAlphaSrcConstBitDepth);
1448         TRACE("lpDDSAlphaSrc %p.\n", fx->u4.lpDDSAlphaSrc);
1449         TRACE("lpDDSPattern %p.\n", fx->u5.lpDDSPattern);
1450         TRACE("ddckDestColorkey {%#x, %#x}.\n",
1451                 fx->ddckDestColorkey.color_space_low_value,
1452                 fx->ddckDestColorkey.color_space_high_value);
1453         TRACE("ddckSrcColorkey {%#x, %#x}.\n",
1454                 fx->ddckSrcColorkey.color_space_low_value,
1455                 fx->ddckSrcColorkey.color_space_high_value);
1456     }
1457
1458     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1459     {
1460         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1461         return WINEDDERR_SURFACEBUSY;
1462     }
1463
1464     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1465
1466     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1467             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1468             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1469             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1470             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1471     {
1472         WARN("The application gave us a bad destination rectangle.\n");
1473         return WINEDDERR_INVALIDRECT;
1474     }
1475
1476     if (src_surface)
1477     {
1478         surface_get_rect(src_surface, src_rect_in, &src_rect);
1479
1480         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1481                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1482                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1483                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1484                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1485         {
1486             WARN("Application gave us bad source rectangle for Blt.\n");
1487             return WINEDDERR_INVALIDRECT;
1488         }
1489     }
1490     else
1491     {
1492         memset(&src_rect, 0, sizeof(src_rect));
1493     }
1494
1495     if (!fx || !(fx->dwDDFX))
1496         flags &= ~WINEDDBLT_DDFX;
1497
1498     if (flags & WINEDDBLT_WAIT)
1499         flags &= ~WINEDDBLT_WAIT;
1500
1501     if (flags & WINEDDBLT_ASYNC)
1502     {
1503         static unsigned int once;
1504
1505         if (!once++)
1506             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1507         flags &= ~WINEDDBLT_ASYNC;
1508     }
1509
1510     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1511     if (flags & WINEDDBLT_DONOTWAIT)
1512     {
1513         static unsigned int once;
1514
1515         if (!once++)
1516             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1517         flags &= ~WINEDDBLT_DONOTWAIT;
1518     }
1519
1520     if (!device->d3d_initialized)
1521     {
1522         WARN("D3D not initialized, using fallback.\n");
1523         goto cpu;
1524     }
1525
1526     /* We want to avoid invalidating the sysmem location for converted
1527      * surfaces, since otherwise we'd have to convert the data back when
1528      * locking them. */
1529     if (dst_surface->flags & SFLAG_CONVERTED)
1530     {
1531         WARN("Converted surface, using CPU blit.\n");
1532         return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1533     }
1534
1535     if (flags & ~simple_blit)
1536     {
1537         WARN("Using fallback for complex blit (%#x).\n", flags);
1538         goto fallback;
1539     }
1540
1541     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1542         src_swapchain = src_surface->container.u.swapchain;
1543     else
1544         src_swapchain = NULL;
1545
1546     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1547         dst_swapchain = dst_surface->container.u.swapchain;
1548     else
1549         dst_swapchain = NULL;
1550
1551     /* This isn't strictly needed. FBO blits for example could deal with
1552      * cross-swapchain blits by first downloading the source to a texture
1553      * before switching to the destination context. We just have this here to
1554      * not have to deal with the issue, since cross-swapchain blits should be
1555      * rare. */
1556     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1557     {
1558         FIXME("Using fallback for cross-swapchain blit.\n");
1559         goto fallback;
1560     }
1561
1562     scale = src_surface
1563             && (src_rect.right - src_rect.left != dst_rect.right - dst_rect.left
1564             || src_rect.bottom - src_rect.top != dst_rect.bottom - dst_rect.top);
1565     convert = src_surface && src_surface->resource.format->id != dst_surface->resource.format->id;
1566
1567     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1568     if (src_surface)
1569         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1570     else
1571         src_ds_flags = 0;
1572
1573     if (src_ds_flags || dst_ds_flags)
1574     {
1575         if (flags & WINEDDBLT_DEPTHFILL)
1576         {
1577             float depth;
1578
1579             TRACE("Depth fill.\n");
1580
1581             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1582                 return WINED3DERR_INVALIDCALL;
1583
1584             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1585                 return WINED3D_OK;
1586         }
1587         else
1588         {
1589             /* Accessing depth / stencil surfaces is supposed to fail while in
1590              * a scene, except for fills, which seem to work. */
1591             if (device->inScene)
1592             {
1593                 WARN("Rejecting depth / stencil access while in scene.\n");
1594                 return WINED3DERR_INVALIDCALL;
1595             }
1596
1597             if (src_ds_flags != dst_ds_flags)
1598             {
1599                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1600                 return WINED3DERR_INVALIDCALL;
1601             }
1602
1603             if (src_rect.top || src_rect.left
1604                     || src_rect.bottom != src_surface->resource.height
1605                     || src_rect.right != src_surface->resource.width)
1606             {
1607                 WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
1608                         wine_dbgstr_rect(&src_rect));
1609                 return WINED3DERR_INVALIDCALL;
1610             }
1611
1612             if (dst_rect.top || dst_rect.left
1613                     || dst_rect.bottom != dst_surface->resource.height
1614                     || dst_rect.right != dst_surface->resource.width)
1615             {
1616                 WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
1617                         wine_dbgstr_rect(&src_rect));
1618                 return WINED3DERR_INVALIDCALL;
1619             }
1620
1621             if (scale)
1622             {
1623                 WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
1624                 return WINED3DERR_INVALIDCALL;
1625             }
1626
1627             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1628                 return WINED3D_OK;
1629         }
1630     }
1631     else
1632     {
1633         /* In principle this would apply to depth blits as well, but we don't
1634          * implement those in the CPU blitter at the moment. */
1635         if ((dst_surface->flags & SFLAG_INSYSMEM)
1636                 && (!src_surface || (src_surface->flags & SFLAG_INSYSMEM)))
1637         {
1638             if (scale)
1639                 TRACE("Not doing sysmem blit because of scaling.\n");
1640             else if (convert)
1641                 TRACE("Not doing sysmem blit because of format conversion.\n");
1642             else
1643                 return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1644         }
1645
1646         if (flags & WINEDDBLT_COLORFILL)
1647         {
1648             struct wined3d_color color;
1649
1650             TRACE("Color fill.\n");
1651
1652             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1653                 goto fallback;
1654
1655             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1656                 return WINED3D_OK;
1657         }
1658         else
1659         {
1660             TRACE("Color blit.\n");
1661
1662             /* Upload */
1663             if ((src_surface->flags & SFLAG_INSYSMEM) && !(dst_surface->flags & SFLAG_INSYSMEM))
1664             {
1665                 if (scale)
1666                     TRACE("Not doing upload because of scaling.\n");
1667                 else if (convert)
1668                     TRACE("Not doing upload because of format conversion.\n");
1669                 else
1670                 {
1671                     POINT dst_point = {dst_rect.left, dst_rect.top};
1672
1673                     if (SUCCEEDED(surface_upload_from_surface(dst_surface, &dst_point, src_surface, &src_rect)))
1674                     {
1675                         if (!surface_is_offscreen(dst_surface))
1676                             surface_load_location(dst_surface, dst_surface->draw_binding, NULL);
1677                         return WINED3D_OK;
1678                     }
1679                 }
1680             }
1681
1682             /* Use present for back -> front blits. The idea behind this is
1683              * that present is potentially faster than a blit, in particular
1684              * when FBO blits aren't available. Some ddraw applications like
1685              * Half-Life and Prince of Persia 3D use Blt() from the backbuffer
1686              * to the frontbuffer instead of doing a Flip(). D3D8 and D3D9
1687              * applications can't blit directly to the frontbuffer. */
1688             if (dst_swapchain && dst_swapchain->back_buffers
1689                     && dst_surface == dst_swapchain->front_buffer
1690                     && src_surface == dst_swapchain->back_buffers[0])
1691             {
1692                 enum wined3d_swap_effect swap_effect = dst_swapchain->desc.swap_effect;
1693
1694                 TRACE("Using present for backbuffer -> frontbuffer blit.\n");
1695
1696                 /* Set the swap effect to COPY, we don't want the backbuffer
1697                  * to become undefined. */
1698                 dst_swapchain->desc.swap_effect = WINED3D_SWAP_EFFECT_COPY;
1699                 wined3d_swapchain_present(dst_swapchain, NULL, NULL, dst_swapchain->win_handle, NULL, 0);
1700                 dst_swapchain->desc.swap_effect = swap_effect;
1701
1702                 return WINED3D_OK;
1703             }
1704
1705             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1706                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1707                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1708             {
1709                 TRACE("Using FBO blit.\n");
1710
1711                 surface_blt_fbo(device, filter,
1712                         src_surface, src_surface->draw_binding, &src_rect,
1713                         dst_surface, dst_surface->draw_binding, &dst_rect);
1714                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1715                 return WINED3D_OK;
1716             }
1717
1718             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1719                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1720                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1721             {
1722                 TRACE("Using arbfp blit.\n");
1723
1724                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1725                     return WINED3D_OK;
1726             }
1727         }
1728     }
1729
1730 fallback:
1731
1732     /* Special cases for render targets. */
1733     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1734             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1735     {
1736         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1737                 src_surface, &src_rect, flags, fx, filter)))
1738             return WINED3D_OK;
1739     }
1740
1741 cpu:
1742
1743     /* For the rest call the X11 surface implementation. For render targets
1744      * this should be implemented OpenGL accelerated in BltOverride, other
1745      * blits are rather rare. */
1746     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1747 }
1748
1749 HRESULT CDECL wined3d_surface_get_render_target_data(struct wined3d_surface *surface,
1750         struct wined3d_surface *render_target)
1751 {
1752     TRACE("surface %p, render_target %p.\n", surface, render_target);
1753
1754     /* TODO: Check surface sizes, pools, etc. */
1755
1756     if (render_target->resource.multisample_type)
1757         return WINED3DERR_INVALIDCALL;
1758
1759     return wined3d_surface_blt(surface, NULL, render_target, NULL, 0, NULL, WINED3DTEXF_POINT);
1760 }
1761
1762 /* Context activation is done by the caller. */
1763 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1764 {
1765     if (surface->flags & SFLAG_DIBSECTION)
1766     {
1767         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1768     }
1769     else
1770     {
1771         if (!surface->resource.heapMemory)
1772             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1773         else if (!(surface->flags & SFLAG_CLIENT))
1774             ERR("Surface %p has heapMemory %p and flags %#x.\n",
1775                     surface, surface->resource.heapMemory, surface->flags);
1776
1777         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1778                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1779     }
1780
1781     ENTER_GL();
1782     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1783     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1784     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1785             surface->resource.size, surface->resource.allocatedMemory));
1786     checkGLcall("glGetBufferSubDataARB");
1787     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1788     checkGLcall("glDeleteBuffersARB");
1789     LEAVE_GL();
1790
1791     surface->pbo = 0;
1792     surface->flags &= ~SFLAG_PBO;
1793 }
1794
1795 /* Do not call while under the GL lock. */
1796 static void surface_unload(struct wined3d_resource *resource)
1797 {
1798     struct wined3d_surface *surface = surface_from_resource(resource);
1799     struct wined3d_renderbuffer_entry *entry, *entry2;
1800     struct wined3d_device *device = resource->device;
1801     const struct wined3d_gl_info *gl_info;
1802     struct wined3d_context *context;
1803
1804     TRACE("surface %p.\n", surface);
1805
1806     if (resource->pool == WINED3DPOOL_DEFAULT)
1807     {
1808         /* Default pool resources are supposed to be destroyed before Reset is called.
1809          * Implicit resources stay however. So this means we have an implicit render target
1810          * or depth stencil. The content may be destroyed, but we still have to tear down
1811          * opengl resources, so we cannot leave early.
1812          *
1813          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1814          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1815          * or the depth stencil into an FBO the texture or render buffer will be removed
1816          * and all flags get lost
1817          */
1818         if (!(surface->flags & SFLAG_PBO))
1819             surface_init_sysmem(surface);
1820         /* We also get here when the ddraw swapchain is destroyed, for example
1821          * for a mode switch. In this case this surface won't necessarily be
1822          * an implicit surface. We have to mark it lost so that the
1823          * application can restore it after the mode switch. */
1824         surface->flags |= SFLAG_LOST;
1825     }
1826     else
1827     {
1828         /* Load the surface into system memory */
1829         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1830         surface_modify_location(surface, surface->draw_binding, FALSE);
1831     }
1832     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1833     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1834     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1835
1836     context = context_acquire(device, NULL);
1837     gl_info = context->gl_info;
1838
1839     /* Destroy PBOs, but load them into real sysmem before */
1840     if (surface->flags & SFLAG_PBO)
1841         surface_remove_pbo(surface, gl_info);
1842
1843     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1844      * all application-created targets the application has to release the surface
1845      * before calling _Reset
1846      */
1847     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1848     {
1849         ENTER_GL();
1850         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1851         LEAVE_GL();
1852         list_remove(&entry->entry);
1853         HeapFree(GetProcessHeap(), 0, entry);
1854     }
1855     list_init(&surface->renderbuffers);
1856     surface->current_renderbuffer = NULL;
1857
1858     ENTER_GL();
1859
1860     /* If we're in a texture, the texture name belongs to the texture.
1861      * Otherwise, destroy it. */
1862     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1863     {
1864         glDeleteTextures(1, &surface->texture_name);
1865         surface->texture_name = 0;
1866         glDeleteTextures(1, &surface->texture_name_srgb);
1867         surface->texture_name_srgb = 0;
1868     }
1869     if (surface->rb_multisample)
1870     {
1871         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1872         surface->rb_multisample = 0;
1873     }
1874     if (surface->rb_resolved)
1875     {
1876         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1877         surface->rb_resolved = 0;
1878     }
1879
1880     LEAVE_GL();
1881
1882     context_release(context);
1883
1884     resource_unload(resource);
1885 }
1886
1887 static const struct wined3d_resource_ops surface_resource_ops =
1888 {
1889     surface_unload,
1890 };
1891
1892 static const struct wined3d_surface_ops surface_ops =
1893 {
1894     surface_private_setup,
1895     surface_realize_palette,
1896     surface_map,
1897     surface_unmap,
1898 };
1899
1900 /*****************************************************************************
1901  * Initializes the GDI surface, aka creates the DIB section we render to
1902  * The DIB section creation is done by calling GetDC, which will create the
1903  * section and releasing the dc to allow the app to use it. The dib section
1904  * will stay until the surface is released
1905  *
1906  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1907  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1908  * avoid confusion in the shared surface code.
1909  *
1910  * Returns:
1911  *  WINED3D_OK on success
1912  *  The return values of called methods on failure
1913  *
1914  *****************************************************************************/
1915 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1916 {
1917     HRESULT hr;
1918
1919     TRACE("surface %p.\n", surface);
1920
1921     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1922     {
1923         ERR("Overlays not yet supported by GDI surfaces.\n");
1924         return WINED3DERR_INVALIDCALL;
1925     }
1926
1927     /* Sysmem textures have memory already allocated - release it,
1928      * this avoids an unnecessary memcpy. */
1929     hr = surface_create_dib_section(surface);
1930     if (SUCCEEDED(hr))
1931     {
1932         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1933         surface->resource.heapMemory = NULL;
1934         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1935     }
1936
1937     /* We don't mind the nonpow2 stuff in GDI. */
1938     surface->pow2Width = surface->resource.width;
1939     surface->pow2Height = surface->resource.height;
1940
1941     return WINED3D_OK;
1942 }
1943
1944 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1945 {
1946     struct wined3d_palette *palette = surface->palette;
1947
1948     TRACE("surface %p.\n", surface);
1949
1950     if (!palette) return;
1951
1952     if (surface->flags & SFLAG_DIBSECTION)
1953     {
1954         RGBQUAD col[256];
1955         unsigned int i;
1956
1957         TRACE("Updating the DC's palette.\n");
1958
1959         for (i = 0; i < 256; ++i)
1960         {
1961             col[i].rgbRed = palette->palents[i].peRed;
1962             col[i].rgbGreen = palette->palents[i].peGreen;
1963             col[i].rgbBlue = palette->palents[i].peBlue;
1964             col[i].rgbReserved = 0;
1965         }
1966         SetDIBColorTable(surface->hDC, 0, 256, col);
1967     }
1968
1969     /* Update the image because of the palette change. Some games like e.g.
1970      * Red Alert call SetEntries a lot to implement fading. */
1971     /* Tell the swapchain to update the screen. */
1972     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1973     {
1974         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1975         if (surface == swapchain->front_buffer)
1976         {
1977             x11_copy_to_screen(swapchain, NULL);
1978         }
1979     }
1980 }
1981
1982 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
1983 {
1984     TRACE("surface %p, rect %s, flags %#x.\n",
1985             surface, wine_dbgstr_rect(rect), flags);
1986
1987     if (!(surface->flags & SFLAG_DIBSECTION))
1988     {
1989         /* This happens on gdi surfaces if the application set a user pointer
1990          * and resets it. Recreate the DIB section. */
1991         surface_create_dib_section(surface);
1992         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1993     }
1994 }
1995
1996 static void gdi_surface_unmap(struct wined3d_surface *surface)
1997 {
1998     TRACE("surface %p.\n", surface);
1999
2000     /* Tell the swapchain to update the screen. */
2001     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2002     {
2003         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2004         if (surface == swapchain->front_buffer)
2005         {
2006             x11_copy_to_screen(swapchain, &surface->lockedRect);
2007         }
2008     }
2009
2010     memset(&surface->lockedRect, 0, sizeof(RECT));
2011 }
2012
2013 static const struct wined3d_surface_ops gdi_surface_ops =
2014 {
2015     gdi_surface_private_setup,
2016     gdi_surface_realize_palette,
2017     gdi_surface_map,
2018     gdi_surface_unmap,
2019 };
2020
2021 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2022 {
2023     GLuint *name;
2024     DWORD flag;
2025
2026     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2027
2028     if(srgb)
2029     {
2030         name = &surface->texture_name_srgb;
2031         flag = SFLAG_INSRGBTEX;
2032     }
2033     else
2034     {
2035         name = &surface->texture_name;
2036         flag = SFLAG_INTEXTURE;
2037     }
2038
2039     if (!*name && new_name)
2040     {
2041         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2042          * surface has no texture name yet. See if we can get rid of this. */
2043         if (surface->flags & flag)
2044             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2045         surface_modify_location(surface, flag, FALSE);
2046     }
2047
2048     *name = new_name;
2049     surface_force_reload(surface);
2050 }
2051
2052 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
2053 {
2054     TRACE("surface %p, target %#x.\n", surface, target);
2055
2056     if (surface->texture_target != target)
2057     {
2058         if (target == GL_TEXTURE_RECTANGLE_ARB)
2059         {
2060             surface->flags &= ~SFLAG_NORMCOORD;
2061         }
2062         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2063         {
2064             surface->flags |= SFLAG_NORMCOORD;
2065         }
2066     }
2067     surface->texture_target = target;
2068     surface_force_reload(surface);
2069 }
2070
2071 /* Context activation is done by the caller. */
2072 void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
2073 {
2074     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
2075
2076     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2077     {
2078         struct wined3d_texture *texture = surface->container.u.texture;
2079
2080         TRACE("Passing to container (%p).\n", texture);
2081         texture->texture_ops->texture_bind(texture, context, srgb);
2082     }
2083     else
2084     {
2085         if (surface->texture_level)
2086         {
2087             ERR("Standalone surface %p is non-zero texture level %u.\n",
2088                     surface, surface->texture_level);
2089         }
2090
2091         if (srgb)
2092             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
2093
2094         ENTER_GL();
2095
2096         if (!surface->texture_name)
2097         {
2098             glGenTextures(1, &surface->texture_name);
2099             checkGLcall("glGenTextures");
2100
2101             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
2102
2103             context_bind_texture(context, surface->texture_target, surface->texture_name);
2104             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2105             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2106             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
2107             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2108             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2109             checkGLcall("glTexParameteri");
2110         }
2111         else
2112         {
2113             context_bind_texture(context, surface->texture_target, surface->texture_name);
2114         }
2115
2116         LEAVE_GL();
2117     }
2118 }
2119
2120 /* This call just downloads data, the caller is responsible for binding the
2121  * correct texture. */
2122 /* Context activation is done by the caller. */
2123 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2124 {
2125     const struct wined3d_format *format = surface->resource.format;
2126
2127     /* Only support read back of converted P8 surfaces. */
2128     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2129     {
2130         ERR("Trying to read back converted surface %p with format %s.\n", surface, debug_d3dformat(format->id));
2131         return;
2132     }
2133
2134     ENTER_GL();
2135
2136     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2137     {
2138         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2139                 surface, surface->texture_level, format->glFormat, format->glType,
2140                 surface->resource.allocatedMemory);
2141
2142         if (surface->flags & SFLAG_PBO)
2143         {
2144             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2145             checkGLcall("glBindBufferARB");
2146             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2147             checkGLcall("glGetCompressedTexImageARB");
2148             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2149             checkGLcall("glBindBufferARB");
2150         }
2151         else
2152         {
2153             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2154                     surface->texture_level, surface->resource.allocatedMemory));
2155             checkGLcall("glGetCompressedTexImageARB");
2156         }
2157
2158         LEAVE_GL();
2159     }
2160     else
2161     {
2162         void *mem;
2163         GLenum gl_format = format->glFormat;
2164         GLenum gl_type = format->glType;
2165         int src_pitch = 0;
2166         int dst_pitch = 0;
2167
2168         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2169         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2170         {
2171             gl_format = GL_ALPHA;
2172             gl_type = GL_UNSIGNED_BYTE;
2173         }
2174
2175         if (surface->flags & SFLAG_NONPOW2)
2176         {
2177             unsigned char alignment = surface->resource.device->surface_alignment;
2178             src_pitch = format->byte_count * surface->pow2Width;
2179             dst_pitch = wined3d_surface_get_pitch(surface);
2180             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2181             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2182         }
2183         else
2184         {
2185             mem = surface->resource.allocatedMemory;
2186         }
2187
2188         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2189                 surface, surface->texture_level, gl_format, gl_type, mem);
2190
2191         if (surface->flags & SFLAG_PBO)
2192         {
2193             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2194             checkGLcall("glBindBufferARB");
2195
2196             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2197             checkGLcall("glGetTexImage");
2198
2199             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2200             checkGLcall("glBindBufferARB");
2201         }
2202         else
2203         {
2204             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2205             checkGLcall("glGetTexImage");
2206         }
2207         LEAVE_GL();
2208
2209         if (surface->flags & SFLAG_NONPOW2)
2210         {
2211             const BYTE *src_data;
2212             BYTE *dst_data;
2213             UINT y;
2214             /*
2215              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2216              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2217              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2218              *
2219              * We're doing this...
2220              *
2221              * instead of boxing the texture :
2222              * |<-texture width ->|  -->pow2width|   /\
2223              * |111111111111111111|              |   |
2224              * |222 Texture 222222| boxed empty  | texture height
2225              * |3333 Data 33333333|              |   |
2226              * |444444444444444444|              |   \/
2227              * -----------------------------------   |
2228              * |     boxed  empty | boxed empty  | pow2height
2229              * |                  |              |   \/
2230              * -----------------------------------
2231              *
2232              *
2233              * we're repacking the data to the expected texture width
2234              *
2235              * |<-texture width ->|  -->pow2width|   /\
2236              * |111111111111111111222222222222222|   |
2237              * |222333333333333333333444444444444| texture height
2238              * |444444                           |   |
2239              * |                                 |   \/
2240              * |                                 |   |
2241              * |            empty                | pow2height
2242              * |                                 |   \/
2243              * -----------------------------------
2244              *
2245              * == is the same as
2246              *
2247              * |<-texture width ->|    /\
2248              * |111111111111111111|
2249              * |222222222222222222|texture height
2250              * |333333333333333333|
2251              * |444444444444444444|    \/
2252              * --------------------
2253              *
2254              * this also means that any references to allocatedMemory should work with the data as if were a
2255              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2256              *
2257              * internally the texture is still stored in a boxed format so any references to textureName will
2258              * get a boxed texture with width pow2width and not a texture of width resource.width.
2259              *
2260              * Performance should not be an issue, because applications normally do not lock the surfaces when
2261              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2262              * and doesn't have to be re-read. */
2263             src_data = mem;
2264             dst_data = surface->resource.allocatedMemory;
2265             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2266             for (y = 1; y < surface->resource.height; ++y)
2267             {
2268                 /* skip the first row */
2269                 src_data += src_pitch;
2270                 dst_data += dst_pitch;
2271                 memcpy(dst_data, src_data, dst_pitch);
2272             }
2273
2274             HeapFree(GetProcessHeap(), 0, mem);
2275         }
2276     }
2277
2278     /* Surface has now been downloaded */
2279     surface->flags |= SFLAG_INSYSMEM;
2280 }
2281
2282 /* This call just uploads data, the caller is responsible for binding the
2283  * correct texture. */
2284 /* Context activation is done by the caller. */
2285 static void surface_upload_data(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2286         const struct wined3d_format *format, const RECT *src_rect, UINT src_pitch, const POINT *dst_point,
2287         BOOL srgb, const struct wined3d_bo_address *data)
2288 {
2289     UINT update_w = src_rect->right - src_rect->left;
2290     UINT update_h = src_rect->bottom - src_rect->top;
2291
2292     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_pitch %u, dst_point %s, srgb %#x, data {%#x:%p}.\n",
2293             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_pitch,
2294             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2295
2296     if (format->heightscale != 1.0f && format->heightscale != 0.0f)
2297         update_h *= format->heightscale;
2298
2299     ENTER_GL();
2300
2301     if (data->buffer_object)
2302     {
2303         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2304         checkGLcall("glBindBufferARB");
2305     }
2306
2307     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2308     {
2309         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2310         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2311         const BYTE *addr = data->addr;
2312         GLenum internal;
2313
2314         addr += (src_rect->top / format->block_height) * src_pitch;
2315         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2316
2317         if (srgb)
2318             internal = format->glGammaInternal;
2319         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2320             internal = format->rtInternal;
2321         else
2322             internal = format->glInternal;
2323
2324         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2325                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2326                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2327
2328         if (row_length == src_pitch)
2329         {
2330             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2331                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2332         }
2333         else
2334         {
2335             UINT row, y;
2336
2337             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2338              * can't use the unpack row length like below. */
2339             for (row = 0, y = dst_point->y; row < row_count; ++row)
2340             {
2341                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2342                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2343                 y += format->block_height;
2344                 addr += src_pitch;
2345             }
2346         }
2347         checkGLcall("glCompressedTexSubImage2DARB");
2348     }
2349     else
2350     {
2351         const BYTE *addr = data->addr;
2352
2353         addr += src_rect->top * src_pitch;
2354         addr += src_rect->left * format->byte_count;
2355
2356         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2357                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2358                 update_w, update_h, format->glFormat, format->glType, addr);
2359
2360         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_pitch / format->byte_count);
2361         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2362                 update_w, update_h, format->glFormat, format->glType, addr);
2363         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2364         checkGLcall("glTexSubImage2D");
2365     }
2366
2367     if (data->buffer_object)
2368     {
2369         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2370         checkGLcall("glBindBufferARB");
2371     }
2372
2373     LEAVE_GL();
2374
2375     if (wined3d_settings.strict_draw_ordering)
2376         wglFlush();
2377
2378     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2379     {
2380         struct wined3d_device *device = surface->resource.device;
2381         unsigned int i;
2382
2383         for (i = 0; i < device->context_count; ++i)
2384         {
2385             context_surface_update(device->contexts[i], surface);
2386         }
2387     }
2388 }
2389
2390 HRESULT surface_upload_from_surface(struct wined3d_surface *dst_surface, const POINT *dst_point,
2391         struct wined3d_surface *src_surface, const RECT *src_rect)
2392 {
2393     const struct wined3d_format *src_format;
2394     const struct wined3d_format *dst_format;
2395     const struct wined3d_gl_info *gl_info;
2396     struct wined3d_context *context;
2397     struct wined3d_bo_address data;
2398     struct wined3d_format format;
2399     UINT update_w, update_h;
2400     CONVERT_TYPES convert;
2401     UINT dst_w, dst_h;
2402     UINT src_w, src_h;
2403     UINT src_pitch;
2404     POINT p;
2405     RECT r;
2406
2407     TRACE("dst_surface %p, dst_point %s, src_surface %p, src_rect %s.\n",
2408             dst_surface, wine_dbgstr_point(dst_point),
2409             src_surface, wine_dbgstr_rect(src_rect));
2410
2411     src_format = src_surface->resource.format;
2412     dst_format = dst_surface->resource.format;
2413
2414     if (src_format->id != dst_format->id)
2415     {
2416         WARN("Source and destination surfaces should have the same format.\n");
2417         return WINED3DERR_INVALIDCALL;
2418     }
2419
2420     if (!dst_point)
2421     {
2422         p.x = 0;
2423         p.y = 0;
2424         dst_point = &p;
2425     }
2426     else if (dst_point->x < 0 || dst_point->y < 0)
2427     {
2428         WARN("Invalid destination point.\n");
2429         return WINED3DERR_INVALIDCALL;
2430     }
2431
2432     if (!src_rect)
2433     {
2434         r.left = 0;
2435         r.top = 0;
2436         r.right = src_surface->resource.width;
2437         r.bottom = src_surface->resource.height;
2438         src_rect = &r;
2439     }
2440     else if (src_rect->left < 0 || src_rect->left >= src_rect->right
2441             || src_rect->top < 0 || src_rect->top >= src_rect->bottom)
2442     {
2443         WARN("Invalid source rectangle.\n");
2444         return WINED3DERR_INVALIDCALL;
2445     }
2446
2447     src_w = src_surface->resource.width;
2448     src_h = src_surface->resource.height;
2449
2450     dst_w = dst_surface->resource.width;
2451     dst_h = dst_surface->resource.height;
2452
2453     update_w = src_rect->right - src_rect->left;
2454     update_h = src_rect->bottom - src_rect->top;
2455
2456     if (update_w > dst_w || dst_point->x > dst_w - update_w
2457             || update_h > dst_h || dst_point->y > dst_h - update_h)
2458     {
2459         WARN("Destination out of bounds.\n");
2460         return WINED3DERR_INVALIDCALL;
2461     }
2462
2463     /* NPOT block sizes would be silly. */
2464     if ((src_format->flags & WINED3DFMT_FLAG_BLOCKS)
2465             && ((update_w & (src_format->block_width - 1) || update_h & (src_format->block_height - 1))
2466             && (src_w != update_w || dst_w != update_w || src_h != update_h || dst_h != update_h)))
2467     {
2468         WARN("Update rect not block-aligned.\n");
2469         return WINED3DERR_INVALIDCALL;
2470     }
2471
2472     /* Use wined3d_surface_blt() instead of uploading directly if we need conversion. */
2473     d3dfmt_get_conv(dst_surface, FALSE, TRUE, &format, &convert);
2474     if (convert != NO_CONVERSION || format.convert)
2475     {
2476         RECT dst_rect = {dst_point->x,  dst_point->y, dst_point->x + update_w, dst_point->y + update_h};
2477         return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, src_rect, 0, NULL, WINED3DTEXF_POINT);
2478     }
2479
2480     context = context_acquire(dst_surface->resource.device, NULL);
2481     gl_info = context->gl_info;
2482
2483     /* Only load the surface for partial updates. For newly allocated texture
2484      * the texture wouldn't be the current location, and we'd upload zeroes
2485      * just to overwrite them again. */
2486     if (update_w == dst_w && update_h == dst_h)
2487         surface_prepare_texture(dst_surface, context, FALSE);
2488     else
2489         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
2490     surface_bind(dst_surface, context, FALSE);
2491
2492     data.buffer_object = src_surface->pbo;
2493     data.addr = src_surface->resource.allocatedMemory;
2494     src_pitch = wined3d_surface_get_pitch(src_surface);
2495
2496     surface_upload_data(dst_surface, gl_info, src_format, src_rect, src_pitch, dst_point, FALSE, &data);
2497
2498     invalidate_active_texture(dst_surface->resource.device, context);
2499
2500     context_release(context);
2501
2502     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
2503     return WINED3D_OK;
2504 }
2505
2506 /* This call just allocates the texture, the caller is responsible for binding
2507  * the correct texture. */
2508 /* Context activation is done by the caller. */
2509 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2510         const struct wined3d_format *format, BOOL srgb)
2511 {
2512     BOOL enable_client_storage = FALSE;
2513     GLsizei width = surface->pow2Width;
2514     GLsizei height = surface->pow2Height;
2515     const BYTE *mem = NULL;
2516     GLenum internal;
2517
2518     if (srgb)
2519     {
2520         internal = format->glGammaInternal;
2521     }
2522     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2523     {
2524         internal = format->rtInternal;
2525     }
2526     else
2527     {
2528         internal = format->glInternal;
2529     }
2530
2531     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2532
2533     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2534             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2535             internal, width, height, format->glFormat, format->glType);
2536
2537     ENTER_GL();
2538
2539     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2540     {
2541         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2542                 || !surface->resource.allocatedMemory)
2543         {
2544             /* In some cases we want to disable client storage.
2545              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2546              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2547              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2548              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2549              */
2550             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2551             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2552             surface->flags &= ~SFLAG_CLIENT;
2553             enable_client_storage = TRUE;
2554         }
2555         else
2556         {
2557             surface->flags |= SFLAG_CLIENT;
2558
2559             /* Point OpenGL to our allocated texture memory. Do not use
2560              * resource.allocatedMemory here because it might point into a
2561              * PBO. Instead use heapMemory, but get the alignment right. */
2562             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2563                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2564         }
2565     }
2566
2567     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2568     {
2569         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2570                 internal, width, height, 0, surface->resource.size, mem));
2571         checkGLcall("glCompressedTexImage2DARB");
2572     }
2573     else
2574     {
2575         glTexImage2D(surface->texture_target, surface->texture_level,
2576                 internal, width, height, 0, format->glFormat, format->glType, mem);
2577         checkGLcall("glTexImage2D");
2578     }
2579
2580     if(enable_client_storage) {
2581         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2582         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2583     }
2584     LEAVE_GL();
2585 }
2586
2587 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2588  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2589 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2590 /* GL locking is done by the caller */
2591 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2592 {
2593     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2594     struct wined3d_renderbuffer_entry *entry;
2595     GLuint renderbuffer = 0;
2596     unsigned int src_width, src_height;
2597     unsigned int width, height;
2598
2599     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2600     {
2601         width = rt->pow2Width;
2602         height = rt->pow2Height;
2603     }
2604     else
2605     {
2606         width = surface->pow2Width;
2607         height = surface->pow2Height;
2608     }
2609
2610     src_width = surface->pow2Width;
2611     src_height = surface->pow2Height;
2612
2613     /* A depth stencil smaller than the render target is not valid */
2614     if (width > src_width || height > src_height) return;
2615
2616     /* Remove any renderbuffer set if the sizes match */
2617     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2618             || (width == src_width && height == src_height))
2619     {
2620         surface->current_renderbuffer = NULL;
2621         return;
2622     }
2623
2624     /* Look if we've already got a renderbuffer of the correct dimensions */
2625     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2626     {
2627         if (entry->width == width && entry->height == height)
2628         {
2629             renderbuffer = entry->id;
2630             surface->current_renderbuffer = entry;
2631             break;
2632         }
2633     }
2634
2635     if (!renderbuffer)
2636     {
2637         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2638         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2639         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2640                 surface->resource.format->glInternal, width, height);
2641
2642         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2643         entry->width = width;
2644         entry->height = height;
2645         entry->id = renderbuffer;
2646         list_add_head(&surface->renderbuffers, &entry->entry);
2647
2648         surface->current_renderbuffer = entry;
2649     }
2650
2651     checkGLcall("set_compatible_renderbuffer");
2652 }
2653
2654 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2655 {
2656     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2657
2658     TRACE("surface %p.\n", surface);
2659
2660     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2661     {
2662         ERR("Surface %p is not on a swapchain.\n", surface);
2663         return GL_NONE;
2664     }
2665
2666     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2667     {
2668         if (swapchain->render_to_fbo)
2669         {
2670             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2671             return GL_COLOR_ATTACHMENT0;
2672         }
2673         TRACE("Returning GL_BACK\n");
2674         return GL_BACK;
2675     }
2676     else if (surface == swapchain->front_buffer)
2677     {
2678         TRACE("Returning GL_FRONT\n");
2679         return GL_FRONT;
2680     }
2681
2682     FIXME("Higher back buffer, returning GL_BACK\n");
2683     return GL_BACK;
2684 }
2685
2686 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2687 void surface_add_dirty_rect(struct wined3d_surface *surface, const struct wined3d_box *dirty_rect)
2688 {
2689     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2690
2691     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2692         /* No partial locking for textures yet. */
2693         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2694
2695     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2696     if (dirty_rect)
2697     {
2698         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->left);
2699         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->top);
2700         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->right);
2701         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->bottom);
2702     }
2703     else
2704     {
2705         surface->dirtyRect.left = 0;
2706         surface->dirtyRect.top = 0;
2707         surface->dirtyRect.right = surface->resource.width;
2708         surface->dirtyRect.bottom = surface->resource.height;
2709     }
2710
2711     /* if the container is a texture then mark it dirty. */
2712     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2713     {
2714         TRACE("Passing to container.\n");
2715         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2716     }
2717 }
2718
2719 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2720 {
2721     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2722     BOOL ck_changed;
2723
2724     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2725
2726     if (surface->resource.pool == WINED3DPOOL_SCRATCH)
2727     {
2728         ERR("Not supported on scratch surfaces.\n");
2729         return WINED3DERR_INVALIDCALL;
2730     }
2731
2732     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2733
2734     /* Reload if either the texture and sysmem have different ideas about the
2735      * color key, or the actual key values changed. */
2736     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2737             && (surface->gl_color_key.color_space_low_value != surface->src_blt_color_key.color_space_low_value
2738             || surface->gl_color_key.color_space_high_value != surface->src_blt_color_key.color_space_high_value)))
2739     {
2740         TRACE("Reloading because of color keying\n");
2741         /* To perform the color key conversion we need a sysmem copy of
2742          * the surface. Make sure we have it. */
2743
2744         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2745         /* Make sure the texture is reloaded because of the color key change,
2746          * this kills performance though :( */
2747         /* TODO: This is not necessarily needed with hw palettized texture support. */
2748         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2749         /* Switching color keying on / off may change the internal format. */
2750         if (ck_changed)
2751             surface_force_reload(surface);
2752     }
2753     else if (!(surface->flags & flag))
2754     {
2755         TRACE("Reloading because surface is dirty.\n");
2756     }
2757     else
2758     {
2759         TRACE("surface is already in texture\n");
2760         return WINED3D_OK;
2761     }
2762
2763     /* No partial locking for textures yet. */
2764     surface_load_location(surface, flag, NULL);
2765     surface_evict_sysmem(surface);
2766
2767     return WINED3D_OK;
2768 }
2769
2770 /* See also float_16_to_32() in wined3d_private.h */
2771 static inline unsigned short float_32_to_16(const float *in)
2772 {
2773     int exp = 0;
2774     float tmp = fabsf(*in);
2775     unsigned int mantissa;
2776     unsigned short ret;
2777
2778     /* Deal with special numbers */
2779     if (*in == 0.0f)
2780         return 0x0000;
2781     if (isnan(*in))
2782         return 0x7c01;
2783     if (isinf(*in))
2784         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2785
2786     if (tmp < powf(2, 10))
2787     {
2788         do
2789         {
2790             tmp = tmp * 2.0f;
2791             exp--;
2792         } while (tmp < powf(2, 10));
2793     }
2794     else if (tmp >= powf(2, 11))
2795     {
2796         do
2797         {
2798             tmp /= 2.0f;
2799             exp++;
2800         } while (tmp >= powf(2, 11));
2801     }
2802
2803     mantissa = (unsigned int)tmp;
2804     if (tmp - mantissa >= 0.5f)
2805         ++mantissa; /* Round to nearest, away from zero. */
2806
2807     exp += 10;  /* Normalize the mantissa. */
2808     exp += 15;  /* Exponent is encoded with excess 15. */
2809
2810     if (exp > 30) /* too big */
2811     {
2812         ret = 0x7c00; /* INF */
2813     }
2814     else if (exp <= 0)
2815     {
2816         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2817         while (exp <= 0)
2818         {
2819             mantissa = mantissa >> 1;
2820             ++exp;
2821         }
2822         ret = mantissa & 0x3ff;
2823     }
2824     else
2825     {
2826         ret = (exp << 10) | (mantissa & 0x3ff);
2827     }
2828
2829     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2830     return ret;
2831 }
2832
2833 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2834 {
2835     ULONG refcount;
2836
2837     TRACE("Surface %p, container %p of type %#x.\n",
2838             surface, surface->container.u.base, surface->container.type);
2839
2840     switch (surface->container.type)
2841     {
2842         case WINED3D_CONTAINER_TEXTURE:
2843             return wined3d_texture_incref(surface->container.u.texture);
2844
2845         case WINED3D_CONTAINER_SWAPCHAIN:
2846             return wined3d_swapchain_incref(surface->container.u.swapchain);
2847
2848         default:
2849             ERR("Unhandled container type %#x.\n", surface->container.type);
2850         case WINED3D_CONTAINER_NONE:
2851             break;
2852     }
2853
2854     refcount = InterlockedIncrement(&surface->resource.ref);
2855     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2856
2857     return refcount;
2858 }
2859
2860 /* Do not call while under the GL lock. */
2861 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2862 {
2863     ULONG refcount;
2864
2865     TRACE("Surface %p, container %p of type %#x.\n",
2866             surface, surface->container.u.base, surface->container.type);
2867
2868     switch (surface->container.type)
2869     {
2870         case WINED3D_CONTAINER_TEXTURE:
2871             return wined3d_texture_decref(surface->container.u.texture);
2872
2873         case WINED3D_CONTAINER_SWAPCHAIN:
2874             return wined3d_swapchain_decref(surface->container.u.swapchain);
2875
2876         default:
2877             ERR("Unhandled container type %#x.\n", surface->container.type);
2878         case WINED3D_CONTAINER_NONE:
2879             break;
2880     }
2881
2882     refcount = InterlockedDecrement(&surface->resource.ref);
2883     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2884
2885     if (!refcount)
2886     {
2887         surface_cleanup(surface);
2888         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2889
2890         TRACE("Destroyed surface %p.\n", surface);
2891         HeapFree(GetProcessHeap(), 0, surface);
2892     }
2893
2894     return refcount;
2895 }
2896
2897 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2898 {
2899     return resource_set_priority(&surface->resource, priority);
2900 }
2901
2902 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2903 {
2904     return resource_get_priority(&surface->resource);
2905 }
2906
2907 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2908 {
2909     TRACE("surface %p.\n", surface);
2910
2911     if (!surface->resource.device->d3d_initialized)
2912     {
2913         ERR("D3D not initialized.\n");
2914         return;
2915     }
2916
2917     surface_internal_preload(surface, SRGB_ANY);
2918 }
2919
2920 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2921 {
2922     TRACE("surface %p.\n", surface);
2923
2924     return surface->resource.parent;
2925 }
2926
2927 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2928 {
2929     TRACE("surface %p.\n", surface);
2930
2931     return &surface->resource;
2932 }
2933
2934 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2935 {
2936     TRACE("surface %p, flags %#x.\n", surface, flags);
2937
2938     switch (flags)
2939     {
2940         case WINEDDGBS_CANBLT:
2941         case WINEDDGBS_ISBLTDONE:
2942             return WINED3D_OK;
2943
2944         default:
2945             return WINED3DERR_INVALIDCALL;
2946     }
2947 }
2948
2949 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2950 {
2951     TRACE("surface %p, flags %#x.\n", surface, flags);
2952
2953     /* XXX: DDERR_INVALIDSURFACETYPE */
2954
2955     switch (flags)
2956     {
2957         case WINEDDGFS_CANFLIP:
2958         case WINEDDGFS_ISFLIPDONE:
2959             return WINED3D_OK;
2960
2961         default:
2962             return WINED3DERR_INVALIDCALL;
2963     }
2964 }
2965
2966 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2967 {
2968     TRACE("surface %p.\n", surface);
2969
2970     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2971     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2972 }
2973
2974 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2975 {
2976     TRACE("surface %p.\n", surface);
2977
2978     surface->flags &= ~SFLAG_LOST;
2979     return WINED3D_OK;
2980 }
2981
2982 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2983 {
2984     TRACE("surface %p, palette %p.\n", surface, palette);
2985
2986     if (surface->palette == palette)
2987     {
2988         TRACE("Nop palette change.\n");
2989         return WINED3D_OK;
2990     }
2991
2992     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
2993         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
2994
2995     surface->palette = palette;
2996
2997     if (palette)
2998     {
2999         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3000             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
3001
3002         surface->surface_ops->surface_realize_palette(surface);
3003     }
3004
3005     return WINED3D_OK;
3006 }
3007
3008 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
3009         DWORD flags, const struct wined3d_color_key *color_key)
3010 {
3011     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
3012
3013     if (flags & WINEDDCKEY_COLORSPACE)
3014     {
3015         FIXME(" colorkey value not supported (%08x) !\n", flags);
3016         return WINED3DERR_INVALIDCALL;
3017     }
3018
3019     /* Dirtify the surface, but only if a key was changed. */
3020     if (color_key)
3021     {
3022         switch (flags & ~WINEDDCKEY_COLORSPACE)
3023         {
3024             case WINEDDCKEY_DESTBLT:
3025                 surface->dst_blt_color_key = *color_key;
3026                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3027                 break;
3028
3029             case WINEDDCKEY_DESTOVERLAY:
3030                 surface->dst_overlay_color_key = *color_key;
3031                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3032                 break;
3033
3034             case WINEDDCKEY_SRCOVERLAY:
3035                 surface->src_overlay_color_key = *color_key;
3036                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3037                 break;
3038
3039             case WINEDDCKEY_SRCBLT:
3040                 surface->src_blt_color_key = *color_key;
3041                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3042                 break;
3043         }
3044     }
3045     else
3046     {
3047         switch (flags & ~WINEDDCKEY_COLORSPACE)
3048         {
3049             case WINEDDCKEY_DESTBLT:
3050                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3051                 break;
3052
3053             case WINEDDCKEY_DESTOVERLAY:
3054                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3055                 break;
3056
3057             case WINEDDCKEY_SRCOVERLAY:
3058                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3059                 break;
3060
3061             case WINEDDCKEY_SRCBLT:
3062                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3063                 break;
3064         }
3065     }
3066
3067     return WINED3D_OK;
3068 }
3069
3070 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3071 {
3072     TRACE("surface %p.\n", surface);
3073
3074     return surface->palette;
3075 }
3076
3077 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3078 {
3079     const struct wined3d_format *format = surface->resource.format;
3080     DWORD pitch;
3081
3082     TRACE("surface %p.\n", surface);
3083
3084     if (format->flags & WINED3DFMT_FLAG_BLOCKS)
3085     {
3086         /* Since compressed formats are block based, pitch means the amount of
3087          * bytes to the next row of block rather than the next row of pixels. */
3088         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3089         pitch = row_block_count * format->block_byte_count;
3090     }
3091     else
3092     {
3093         unsigned char alignment = surface->resource.device->surface_alignment;
3094         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3095         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3096     }
3097
3098     TRACE("Returning %u.\n", pitch);
3099
3100     return pitch;
3101 }
3102
3103 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3104 {
3105     TRACE("surface %p, mem %p.\n", surface, mem);
3106
3107     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
3108     {
3109         WARN("Surface is locked or the DC is in use.\n");
3110         return WINED3DERR_INVALIDCALL;
3111     }
3112
3113     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3114     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3115     {
3116         ERR("Not supported on render targets.\n");
3117         return WINED3DERR_INVALIDCALL;
3118     }
3119
3120     if (mem && mem != surface->resource.allocatedMemory)
3121     {
3122         void *release = NULL;
3123
3124         /* Do I have to copy the old surface content? */
3125         if (surface->flags & SFLAG_DIBSECTION)
3126         {
3127             DeleteDC(surface->hDC);
3128             DeleteObject(surface->dib.DIBsection);
3129             surface->dib.bitmap_data = NULL;
3130             surface->resource.allocatedMemory = NULL;
3131             surface->hDC = NULL;
3132             surface->flags &= ~SFLAG_DIBSECTION;
3133         }
3134         else if (!(surface->flags & SFLAG_USERPTR))
3135         {
3136             release = surface->resource.heapMemory;
3137             surface->resource.heapMemory = NULL;
3138         }
3139         surface->resource.allocatedMemory = mem;
3140         surface->flags |= SFLAG_USERPTR;
3141
3142         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3143         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3144
3145         /* For client textures OpenGL has to be notified. */
3146         if (surface->flags & SFLAG_CLIENT)
3147             surface_release_client_storage(surface);
3148
3149         /* Now free the old memory if any. */
3150         HeapFree(GetProcessHeap(), 0, release);
3151     }
3152     else if (surface->flags & SFLAG_USERPTR)
3153     {
3154         /* HeapMemory should be NULL already. */
3155         if (surface->resource.heapMemory)
3156             ERR("User pointer surface has heap memory allocated.\n");
3157
3158         if (!mem)
3159         {
3160             surface->resource.allocatedMemory = NULL;
3161             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3162
3163             if (surface->flags & SFLAG_CLIENT)
3164                 surface_release_client_storage(surface);
3165
3166             surface_prepare_system_memory(surface);
3167         }
3168
3169         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3170     }
3171
3172     return WINED3D_OK;
3173 }
3174
3175 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3176 {
3177     LONG w, h;
3178
3179     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3180
3181     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3182     {
3183         WARN("Not an overlay surface.\n");
3184         return WINEDDERR_NOTAOVERLAYSURFACE;
3185     }
3186
3187     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3188     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3189     surface->overlay_destrect.left = x;
3190     surface->overlay_destrect.top = y;
3191     surface->overlay_destrect.right = x + w;
3192     surface->overlay_destrect.bottom = y + h;
3193
3194     surface_draw_overlay(surface);
3195
3196     return WINED3D_OK;
3197 }
3198
3199 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3200 {
3201     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3202
3203     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3204     {
3205         TRACE("Not an overlay surface.\n");
3206         return WINEDDERR_NOTAOVERLAYSURFACE;
3207     }
3208
3209     if (!surface->overlay_dest)
3210     {
3211         TRACE("Overlay not visible.\n");
3212         *x = 0;
3213         *y = 0;
3214         return WINEDDERR_OVERLAYNOTVISIBLE;
3215     }
3216
3217     *x = surface->overlay_destrect.left;
3218     *y = surface->overlay_destrect.top;
3219
3220     TRACE("Returning position %d, %d.\n", *x, *y);
3221
3222     return WINED3D_OK;
3223 }
3224
3225 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3226         DWORD flags, struct wined3d_surface *ref)
3227 {
3228     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3229
3230     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3231     {
3232         TRACE("Not an overlay surface.\n");
3233         return WINEDDERR_NOTAOVERLAYSURFACE;
3234     }
3235
3236     return WINED3D_OK;
3237 }
3238
3239 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3240         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3241 {
3242     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3243             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3244
3245     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3246     {
3247         WARN("Not an overlay surface.\n");
3248         return WINEDDERR_NOTAOVERLAYSURFACE;
3249     }
3250     else if (!dst_surface)
3251     {
3252         WARN("Dest surface is NULL.\n");
3253         return WINED3DERR_INVALIDCALL;
3254     }
3255
3256     if (src_rect)
3257     {
3258         surface->overlay_srcrect = *src_rect;
3259     }
3260     else
3261     {
3262         surface->overlay_srcrect.left = 0;
3263         surface->overlay_srcrect.top = 0;
3264         surface->overlay_srcrect.right = surface->resource.width;
3265         surface->overlay_srcrect.bottom = surface->resource.height;
3266     }
3267
3268     if (dst_rect)
3269     {
3270         surface->overlay_destrect = *dst_rect;
3271     }
3272     else
3273     {
3274         surface->overlay_destrect.left = 0;
3275         surface->overlay_destrect.top = 0;
3276         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3277         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3278     }
3279
3280     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3281     {
3282         surface->overlay_dest = NULL;
3283         list_remove(&surface->overlay_entry);
3284     }
3285
3286     if (flags & WINEDDOVER_SHOW)
3287     {
3288         if (surface->overlay_dest != dst_surface)
3289         {
3290             surface->overlay_dest = dst_surface;
3291             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3292         }
3293     }
3294     else if (flags & WINEDDOVER_HIDE)
3295     {
3296         /* tests show that the rectangles are erased on hide */
3297         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3298         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3299         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3300         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3301         surface->overlay_dest = NULL;
3302     }
3303
3304     surface_draw_overlay(surface);
3305
3306     return WINED3D_OK;
3307 }
3308
3309 HRESULT CDECL wined3d_surface_set_format(struct wined3d_surface *surface, enum wined3d_format_id format_id)
3310 {
3311     const struct wined3d_format *format = wined3d_get_format(&surface->resource.device->adapter->gl_info, format_id);
3312
3313     TRACE("surface %p, format %s.\n", surface, debug_d3dformat(format_id));
3314
3315     if (surface->resource.format->id != WINED3DFMT_UNKNOWN)
3316     {
3317         FIXME("The format of the surface must be WINED3DFORMAT_UNKNOWN.\n");
3318         return WINED3DERR_INVALIDCALL;
3319     }
3320
3321     surface->resource.size = wined3d_format_calculate_size(format, surface->resource.device->surface_alignment,
3322             surface->pow2Width, surface->pow2Height);
3323     surface->flags |= (WINED3DFMT_D16_LOCKABLE == format_id) ? SFLAG_LOCKABLE : 0;
3324     surface->resource.format = format;
3325
3326     TRACE("size %u, byte_count %u\n", surface->resource.size, format->byte_count);
3327     TRACE("glFormat %#x, glInternal %#x, glType %#x.\n",
3328             format->glFormat, format->glInternal, format->glType);
3329
3330     return WINED3D_OK;
3331 }
3332
3333 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3334         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3335 {
3336     unsigned short *dst_s;
3337     const float *src_f;
3338     unsigned int x, y;
3339
3340     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3341
3342     for (y = 0; y < h; ++y)
3343     {
3344         src_f = (const float *)(src + y * pitch_in);
3345         dst_s = (unsigned short *) (dst + y * pitch_out);
3346         for (x = 0; x < w; ++x)
3347         {
3348             dst_s[x] = float_32_to_16(src_f + x);
3349         }
3350     }
3351 }
3352
3353 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3354         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3355 {
3356     static const unsigned char convert_5to8[] =
3357     {
3358         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3359         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3360         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3361         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3362     };
3363     static const unsigned char convert_6to8[] =
3364     {
3365         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3366         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3367         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3368         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3369         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3370         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3371         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3372         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3373     };
3374     unsigned int x, y;
3375
3376     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3377
3378     for (y = 0; y < h; ++y)
3379     {
3380         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3381         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3382         for (x = 0; x < w; ++x)
3383         {
3384             WORD pixel = src_line[x];
3385             dst_line[x] = 0xff000000
3386                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3387                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3388                     | convert_5to8[(pixel & 0x001f)];
3389         }
3390     }
3391 }
3392
3393 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3394  * in both cases we're just setting the X / Alpha channel to 0xff. */
3395 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3396         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3397 {
3398     unsigned int x, y;
3399
3400     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3401
3402     for (y = 0; y < h; ++y)
3403     {
3404         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3405         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3406
3407         for (x = 0; x < w; ++x)
3408         {
3409             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3410         }
3411     }
3412 }
3413
3414 static inline BYTE cliptobyte(int x)
3415 {
3416     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3417 }
3418
3419 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3420         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3421 {
3422     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3423     unsigned int x, y;
3424
3425     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3426
3427     for (y = 0; y < h; ++y)
3428     {
3429         const BYTE *src_line = src + y * pitch_in;
3430         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3431         for (x = 0; x < w; ++x)
3432         {
3433             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3434              *     C = Y - 16; D = U - 128; E = V - 128;
3435              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3436              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3437              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3438              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3439              * U and V are shared between the pixels. */
3440             if (!(x & 1)) /* For every even pixel, read new U and V. */
3441             {
3442                 d = (int) src_line[1] - 128;
3443                 e = (int) src_line[3] - 128;
3444                 r2 = 409 * e + 128;
3445                 g2 = - 100 * d - 208 * e + 128;
3446                 b2 = 516 * d + 128;
3447             }
3448             c2 = 298 * ((int) src_line[0] - 16);
3449             dst_line[x] = 0xff000000
3450                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3451                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3452                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3453                 /* Scale RGB values to 0..255 range,
3454                  * then clip them if still not in range (may be negative),
3455                  * then shift them within DWORD if necessary. */
3456             src_line += 2;
3457         }
3458     }
3459 }
3460
3461 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3462         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3463 {
3464     unsigned int x, y;
3465     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3466
3467     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3468
3469     for (y = 0; y < h; ++y)
3470     {
3471         const BYTE *src_line = src + y * pitch_in;
3472         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3473         for (x = 0; x < w; ++x)
3474         {
3475             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3476              *     C = Y - 16; D = U - 128; E = V - 128;
3477              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3478              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3479              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3480              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3481              * U and V are shared between the pixels. */
3482             if (!(x & 1)) /* For every even pixel, read new U and V. */
3483             {
3484                 d = (int) src_line[1] - 128;
3485                 e = (int) src_line[3] - 128;
3486                 r2 = 409 * e + 128;
3487                 g2 = - 100 * d - 208 * e + 128;
3488                 b2 = 516 * d + 128;
3489             }
3490             c2 = 298 * ((int) src_line[0] - 16);
3491             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3492                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3493                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3494                 /* Scale RGB values to 0..255 range,
3495                  * then clip them if still not in range (may be negative),
3496                  * then shift them within DWORD if necessary. */
3497             src_line += 2;
3498         }
3499     }
3500 }
3501
3502 struct d3dfmt_convertor_desc
3503 {
3504     enum wined3d_format_id from, to;
3505     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3506 };
3507
3508 static const struct d3dfmt_convertor_desc convertors[] =
3509 {
3510     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3511     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3512     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3513     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3514     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3515     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3516 };
3517
3518 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3519         enum wined3d_format_id to)
3520 {
3521     unsigned int i;
3522
3523     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3524     {
3525         if (convertors[i].from == from && convertors[i].to == to)
3526             return &convertors[i];
3527     }
3528
3529     return NULL;
3530 }
3531
3532 /*****************************************************************************
3533  * surface_convert_format
3534  *
3535  * Creates a duplicate of a surface in a different format. Is used by Blt to
3536  * blit between surfaces with different formats.
3537  *
3538  * Parameters
3539  *  source: Source surface
3540  *  fmt: Requested destination format
3541  *
3542  *****************************************************************************/
3543 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3544 {
3545     struct wined3d_mapped_rect src_map, dst_map;
3546     const struct d3dfmt_convertor_desc *conv;
3547     struct wined3d_surface *ret = NULL;
3548     HRESULT hr;
3549
3550     conv = find_convertor(source->resource.format->id, to_fmt);
3551     if (!conv)
3552     {
3553         FIXME("Cannot find a conversion function from format %s to %s.\n",
3554                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3555         return NULL;
3556     }
3557
3558     wined3d_surface_create(source->resource.device, source->resource.width,
3559             source->resource.height, to_fmt, 0 /* level */, 0 /* usage */, WINED3DPOOL_SCRATCH,
3560             WINED3DMULTISAMPLE_NONE /* TODO: Multisampled conversion */, 0 /* MultiSampleQuality */,
3561             source->surface_type, WINED3D_SURFACE_MAPPABLE | WINED3D_SURFACE_DISCARD,
3562             NULL /* parent */, &wined3d_null_parent_ops, &ret);
3563     if (!ret)
3564     {
3565         ERR("Failed to create a destination surface for conversion.\n");
3566         return NULL;
3567     }
3568
3569     memset(&src_map, 0, sizeof(src_map));
3570     memset(&dst_map, 0, sizeof(dst_map));
3571
3572     hr = wined3d_surface_map(source, &src_map, NULL, WINED3DLOCK_READONLY);
3573     if (FAILED(hr))
3574     {
3575         ERR("Failed to lock the source surface.\n");
3576         wined3d_surface_decref(ret);
3577         return NULL;
3578     }
3579     hr = wined3d_surface_map(ret, &dst_map, NULL, WINED3DLOCK_READONLY);
3580     if (FAILED(hr))
3581     {
3582         ERR("Failed to lock the destination surface.\n");
3583         wined3d_surface_unmap(source);
3584         wined3d_surface_decref(ret);
3585         return NULL;
3586     }
3587
3588     conv->convert(src_map.data, dst_map.data, src_map.row_pitch, dst_map.row_pitch,
3589             source->resource.width, source->resource.height);
3590
3591     wined3d_surface_unmap(ret);
3592     wined3d_surface_unmap(source);
3593
3594     return ret;
3595 }
3596
3597 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3598         unsigned int bpp, UINT pitch, DWORD color)
3599 {
3600     BYTE *first;
3601     int x, y;
3602
3603     /* Do first row */
3604
3605 #define COLORFILL_ROW(type) \
3606 do { \
3607     type *d = (type *)buf; \
3608     for (x = 0; x < width; ++x) \
3609         d[x] = (type)color; \
3610 } while(0)
3611
3612     switch (bpp)
3613     {
3614         case 1:
3615             COLORFILL_ROW(BYTE);
3616             break;
3617
3618         case 2:
3619             COLORFILL_ROW(WORD);
3620             break;
3621
3622         case 3:
3623         {
3624             BYTE *d = buf;
3625             for (x = 0; x < width; ++x, d += 3)
3626             {
3627                 d[0] = (color      ) & 0xFF;
3628                 d[1] = (color >>  8) & 0xFF;
3629                 d[2] = (color >> 16) & 0xFF;
3630             }
3631             break;
3632         }
3633         case 4:
3634             COLORFILL_ROW(DWORD);
3635             break;
3636
3637         default:
3638             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3639             return WINED3DERR_NOTAVAILABLE;
3640     }
3641
3642 #undef COLORFILL_ROW
3643
3644     /* Now copy first row. */
3645     first = buf;
3646     for (y = 1; y < height; ++y)
3647     {
3648         buf += pitch;
3649         memcpy(buf, first, width * bpp);
3650     }
3651
3652     return WINED3D_OK;
3653 }
3654
3655 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3656 {
3657     TRACE("surface %p.\n", surface);
3658
3659     if (!(surface->flags & SFLAG_LOCKED))
3660     {
3661         WARN("Trying to unmap unmapped surface.\n");
3662         return WINEDDERR_NOTLOCKED;
3663     }
3664     surface->flags &= ~SFLAG_LOCKED;
3665
3666     surface->surface_ops->surface_unmap(surface);
3667
3668     return WINED3D_OK;
3669 }
3670
3671 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3672         struct wined3d_mapped_rect *mapped_rect, const RECT *rect, DWORD flags)
3673 {
3674     const struct wined3d_format *format = surface->resource.format;
3675
3676     TRACE("surface %p, mapped_rect %p, rect %s, flags %#x.\n",
3677             surface, mapped_rect, wine_dbgstr_rect(rect), flags);
3678
3679     if (surface->flags & SFLAG_LOCKED)
3680     {
3681         WARN("Surface is already mapped.\n");
3682         return WINED3DERR_INVALIDCALL;
3683     }
3684     if ((format->flags & WINED3DFMT_FLAG_BLOCKS)
3685             && rect && (rect->left || rect->top
3686             || rect->right != surface->resource.width
3687             || rect->bottom != surface->resource.height))
3688     {
3689         UINT width_mask = format->block_width - 1;
3690         UINT height_mask = format->block_height - 1;
3691
3692         if ((rect->left & width_mask) || (rect->right & width_mask)
3693                 || (rect->top & height_mask) || (rect->bottom & height_mask))
3694         {
3695             WARN("Map rect %s is misaligned for %ux%u blocks.\n",
3696                     wine_dbgstr_rect(rect), format->block_width, format->block_height);
3697
3698             if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3699                 return WINED3DERR_INVALIDCALL;
3700         }
3701     }
3702
3703     surface->flags |= SFLAG_LOCKED;
3704
3705     if (!(surface->flags & SFLAG_LOCKABLE))
3706         WARN("Trying to lock unlockable surface.\n");
3707
3708     /* Performance optimization: Count how often a surface is mapped, if it is
3709      * mapped regularly do not throw away the system memory copy. This avoids
3710      * the need to download the surface from OpenGL all the time. The surface
3711      * is still downloaded if the OpenGL texture is changed. */
3712     if (!(surface->flags & SFLAG_DYNLOCK))
3713     {
3714         if (++surface->lockCount > MAXLOCKCOUNT)
3715         {
3716             TRACE("Surface is mapped regularly, not freeing the system memory copy any more.\n");
3717             surface->flags |= SFLAG_DYNLOCK;
3718         }
3719     }
3720
3721     surface->surface_ops->surface_map(surface, rect, flags);
3722
3723     if (format->flags & WINED3DFMT_FLAG_BROKEN_PITCH)
3724         mapped_rect->row_pitch = surface->resource.width * format->byte_count;
3725     else
3726         mapped_rect->row_pitch = wined3d_surface_get_pitch(surface);
3727
3728     if (!rect)
3729     {
3730         mapped_rect->data = surface->resource.allocatedMemory;
3731         surface->lockedRect.left = 0;
3732         surface->lockedRect.top = 0;
3733         surface->lockedRect.right = surface->resource.width;
3734         surface->lockedRect.bottom = surface->resource.height;
3735     }
3736     else
3737     {
3738         if ((format->flags & (WINED3DFMT_FLAG_BLOCKS | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_BLOCKS)
3739         {
3740             /* Compressed textures are block based, so calculate the offset of
3741              * the block that contains the top-left pixel of the locked rectangle. */
3742             mapped_rect->data = surface->resource.allocatedMemory
3743                     + ((rect->top / format->block_height) * mapped_rect->row_pitch)
3744                     + ((rect->left / format->block_width) * format->block_byte_count);
3745         }
3746         else
3747         {
3748             mapped_rect->data = surface->resource.allocatedMemory
3749                     + (mapped_rect->row_pitch * rect->top)
3750                     + (rect->left * format->byte_count);
3751         }
3752         surface->lockedRect.left = rect->left;
3753         surface->lockedRect.top = rect->top;
3754         surface->lockedRect.right = rect->right;
3755         surface->lockedRect.bottom = rect->bottom;
3756     }
3757
3758     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3759     TRACE("Returning memory %p, pitch %u.\n", mapped_rect->data, mapped_rect->row_pitch);
3760
3761     return WINED3D_OK;
3762 }
3763
3764 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3765 {
3766     struct wined3d_mapped_rect map;
3767     HRESULT hr;
3768
3769     TRACE("surface %p, dc %p.\n", surface, dc);
3770
3771     if (surface->flags & SFLAG_USERPTR)
3772     {
3773         ERR("Not supported on surfaces with application-provided memory.\n");
3774         return WINEDDERR_NODC;
3775     }
3776
3777     /* Give more detailed info for ddraw. */
3778     if (surface->flags & SFLAG_DCINUSE)
3779         return WINEDDERR_DCALREADYCREATED;
3780
3781     /* Can't GetDC if the surface is locked. */
3782     if (surface->flags & SFLAG_LOCKED)
3783         return WINED3DERR_INVALIDCALL;
3784
3785     /* Create a DIB section if there isn't a dc yet. */
3786     if (!surface->hDC)
3787     {
3788         if (surface->flags & SFLAG_CLIENT)
3789         {
3790             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3791             surface_release_client_storage(surface);
3792         }
3793         hr = surface_create_dib_section(surface);
3794         if (FAILED(hr))
3795             return WINED3DERR_INVALIDCALL;
3796
3797         /* Use the DIB section from now on if we are not using a PBO. */
3798         if (!(surface->flags & SFLAG_PBO))
3799             surface->resource.allocatedMemory = surface->dib.bitmap_data;
3800     }
3801
3802     /* Map the surface. */
3803     hr = wined3d_surface_map(surface, &map, NULL, 0);
3804     if (FAILED(hr))
3805     {
3806         ERR("Map failed, hr %#x.\n", hr);
3807         return hr;
3808     }
3809
3810     /* Sync the DIB with the PBO. This can't be done earlier because Map()
3811      * activates the allocatedMemory. */
3812     if (surface->flags & SFLAG_PBO)
3813         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
3814
3815     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3816             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3817     {
3818         /* GetDC on palettized formats is unsupported in D3D9, and the method
3819          * is missing in D3D8, so this should only be used for DX <=7
3820          * surfaces (with non-device palettes). */
3821         const PALETTEENTRY *pal = NULL;
3822
3823         if (surface->palette)
3824         {
3825             pal = surface->palette->palents;
3826         }
3827         else
3828         {
3829             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3830             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3831
3832             if (dds_primary && dds_primary->palette)
3833                 pal = dds_primary->palette->palents;
3834         }
3835
3836         if (pal)
3837         {
3838             RGBQUAD col[256];
3839             unsigned int i;
3840
3841             for (i = 0; i < 256; ++i)
3842             {
3843                 col[i].rgbRed = pal[i].peRed;
3844                 col[i].rgbGreen = pal[i].peGreen;
3845                 col[i].rgbBlue = pal[i].peBlue;
3846                 col[i].rgbReserved = 0;
3847             }
3848             SetDIBColorTable(surface->hDC, 0, 256, col);
3849         }
3850     }
3851
3852     surface->flags |= SFLAG_DCINUSE;
3853
3854     *dc = surface->hDC;
3855     TRACE("Returning dc %p.\n", *dc);
3856
3857     return WINED3D_OK;
3858 }
3859
3860 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3861 {
3862     TRACE("surface %p, dc %p.\n", surface, dc);
3863
3864     if (!(surface->flags & SFLAG_DCINUSE))
3865         return WINEDDERR_NODC;
3866
3867     if (surface->hDC != dc)
3868     {
3869         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3870                 dc, surface->hDC);
3871         return WINEDDERR_NODC;
3872     }
3873
3874     /* Copy the contents of the DIB over to the PBO. */
3875     if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
3876         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
3877
3878     /* We locked first, so unlock now. */
3879     wined3d_surface_unmap(surface);
3880
3881     surface->flags &= ~SFLAG_DCINUSE;
3882
3883     return WINED3D_OK;
3884 }
3885
3886 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3887 {
3888     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3889
3890     if (flags)
3891     {
3892         static UINT once;
3893         if (!once++)
3894             FIXME("Ignoring flags %#x.\n", flags);
3895         else
3896             WARN("Ignoring flags %#x.\n", flags);
3897     }
3898
3899     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
3900     {
3901         ERR("Not supported on swapchain surfaces.\n");
3902         return WINEDDERR_NOTFLIPPABLE;
3903     }
3904
3905     /* Flipping is only supported on render targets and overlays. */
3906     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
3907     {
3908         WARN("Tried to flip a non-render target, non-overlay surface.\n");
3909         return WINEDDERR_NOTFLIPPABLE;
3910     }
3911
3912     flip_surface(surface, override);
3913
3914     /* Update overlays if they're visible. */
3915     if ((surface->resource.usage & WINED3DUSAGE_OVERLAY) && surface->overlay_dest)
3916         return surface_draw_overlay(surface);
3917
3918     return WINED3D_OK;
3919 }
3920
3921 /* Do not call while under the GL lock. */
3922 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3923 {
3924     struct wined3d_device *device = surface->resource.device;
3925
3926     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3927
3928     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3929     {
3930         struct wined3d_texture *texture = surface->container.u.texture;
3931
3932         TRACE("Passing to container (%p).\n", texture);
3933         texture->texture_ops->texture_preload(texture, srgb);
3934     }
3935     else
3936     {
3937         struct wined3d_context *context;
3938
3939         TRACE("(%p) : About to load surface\n", surface);
3940
3941         /* TODO: Use already acquired context when possible. */
3942         context = context_acquire(device, NULL);
3943
3944         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3945
3946         if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3947         {
3948             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3949             GLclampf tmp;
3950             tmp = 0.9f;
3951             ENTER_GL();
3952             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3953             LEAVE_GL();
3954         }
3955
3956         context_release(context);
3957     }
3958 }
3959
3960 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3961 {
3962     if (!surface->resource.allocatedMemory)
3963     {
3964         if (!surface->resource.heapMemory)
3965         {
3966             if (!(surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
3967                     surface->resource.size + RESOURCE_ALIGNMENT)))
3968             {
3969                 ERR("Failed to allocate memory.\n");
3970                 return FALSE;
3971             }
3972         }
3973         else if (!(surface->flags & SFLAG_CLIENT))
3974         {
3975             ERR("Surface %p has heapMemory %p and flags %#x.\n",
3976                     surface, surface->resource.heapMemory, surface->flags);
3977         }
3978
3979         surface->resource.allocatedMemory =
3980             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
3981     }
3982     else
3983     {
3984         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
3985     }
3986
3987     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3988
3989     return TRUE;
3990 }
3991
3992 /* Read the framebuffer back into the surface */
3993 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
3994 {
3995     struct wined3d_device *device = surface->resource.device;
3996     const struct wined3d_gl_info *gl_info;
3997     struct wined3d_context *context;
3998     BYTE *mem;
3999     GLint fmt;
4000     GLint type;
4001     BYTE *row, *top, *bottom;
4002     int i;
4003     BOOL bpp;
4004     RECT local_rect;
4005     BOOL srcIsUpsideDown;
4006     GLint rowLen = 0;
4007     GLint skipPix = 0;
4008     GLint skipRow = 0;
4009
4010     context = context_acquire(device, surface);
4011     context_apply_blit_state(context, device);
4012     gl_info = context->gl_info;
4013
4014     ENTER_GL();
4015
4016     /* Select the correct read buffer, and give some debug output.
4017      * There is no need to keep track of the current read buffer or reset it, every part of the code
4018      * that reads sets the read buffer as desired.
4019      */
4020     if (surface_is_offscreen(surface))
4021     {
4022         /* Mapping the primary render target which is not on a swapchain.
4023          * Read from the back buffer. */
4024         TRACE("Mapping offscreen render target.\n");
4025         glReadBuffer(device->offscreenBuffer);
4026         srcIsUpsideDown = TRUE;
4027     }
4028     else
4029     {
4030         /* Onscreen surfaces are always part of a swapchain */
4031         GLenum buffer = surface_get_gl_buffer(surface);
4032         TRACE("Mapping %#x buffer.\n", buffer);
4033         glReadBuffer(buffer);
4034         checkGLcall("glReadBuffer");
4035         srcIsUpsideDown = FALSE;
4036     }
4037
4038     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
4039     if (!rect)
4040     {
4041         local_rect.left = 0;
4042         local_rect.top = 0;
4043         local_rect.right = surface->resource.width;
4044         local_rect.bottom = surface->resource.height;
4045     }
4046     else
4047     {
4048         local_rect = *rect;
4049     }
4050     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
4051
4052     switch (surface->resource.format->id)
4053     {
4054         case WINED3DFMT_P8_UINT:
4055         {
4056             if (primary_render_target_is_p8(device))
4057             {
4058                 /* In case of P8 render targets the index is stored in the alpha component */
4059                 fmt = GL_ALPHA;
4060                 type = GL_UNSIGNED_BYTE;
4061                 mem = dest;
4062                 bpp = surface->resource.format->byte_count;
4063             }
4064             else
4065             {
4066                 /* GL can't return palettized data, so read ARGB pixels into a
4067                  * separate block of memory and convert them into palettized format
4068                  * in software. Slow, but if the app means to use palettized render
4069                  * targets and locks it...
4070                  *
4071                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4072                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4073                  * for the color channels when palettizing the colors.
4074                  */
4075                 fmt = GL_RGB;
4076                 type = GL_UNSIGNED_BYTE;
4077                 pitch *= 3;
4078                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4079                 if (!mem)
4080                 {
4081                     ERR("Out of memory\n");
4082                     LEAVE_GL();
4083                     return;
4084                 }
4085                 bpp = surface->resource.format->byte_count * 3;
4086             }
4087         }
4088         break;
4089
4090         default:
4091             mem = dest;
4092             fmt = surface->resource.format->glFormat;
4093             type = surface->resource.format->glType;
4094             bpp = surface->resource.format->byte_count;
4095     }
4096
4097     if (surface->flags & SFLAG_PBO)
4098     {
4099         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4100         checkGLcall("glBindBufferARB");
4101         if (mem)
4102         {
4103             ERR("mem not null for pbo -- unexpected\n");
4104             mem = NULL;
4105         }
4106     }
4107
4108     /* Save old pixel store pack state */
4109     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4110     checkGLcall("glGetIntegerv");
4111     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4112     checkGLcall("glGetIntegerv");
4113     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4114     checkGLcall("glGetIntegerv");
4115
4116     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4117     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4118     checkGLcall("glPixelStorei");
4119     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4120     checkGLcall("glPixelStorei");
4121     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4122     checkGLcall("glPixelStorei");
4123
4124     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4125             local_rect.right - local_rect.left,
4126             local_rect.bottom - local_rect.top,
4127             fmt, type, mem);
4128     checkGLcall("glReadPixels");
4129
4130     /* Reset previous pixel store pack state */
4131     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4132     checkGLcall("glPixelStorei");
4133     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4134     checkGLcall("glPixelStorei");
4135     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4136     checkGLcall("glPixelStorei");
4137
4138     if (surface->flags & SFLAG_PBO)
4139     {
4140         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4141         checkGLcall("glBindBufferARB");
4142
4143         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4144          * to get a pointer to it and perform the flipping in software. This is a lot
4145          * faster than calling glReadPixels for each line. In case we want more speed
4146          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4147         if (!srcIsUpsideDown)
4148         {
4149             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4150             checkGLcall("glBindBufferARB");
4151
4152             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4153             checkGLcall("glMapBufferARB");
4154         }
4155     }
4156
4157     /* TODO: Merge this with the palettization loop below for P8 targets */
4158     if(!srcIsUpsideDown) {
4159         UINT len, off;
4160         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4161             Flip the lines in software */
4162         len = (local_rect.right - local_rect.left) * bpp;
4163         off = local_rect.left * bpp;
4164
4165         row = HeapAlloc(GetProcessHeap(), 0, len);
4166         if(!row) {
4167             ERR("Out of memory\n");
4168             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4169                 HeapFree(GetProcessHeap(), 0, mem);
4170             LEAVE_GL();
4171             return;
4172         }
4173
4174         top = mem + pitch * local_rect.top;
4175         bottom = mem + pitch * (local_rect.bottom - 1);
4176         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4177             memcpy(row, top + off, len);
4178             memcpy(top + off, bottom + off, len);
4179             memcpy(bottom + off, row, len);
4180             top += pitch;
4181             bottom -= pitch;
4182         }
4183         HeapFree(GetProcessHeap(), 0, row);
4184
4185         /* Unmap the temp PBO buffer */
4186         if (surface->flags & SFLAG_PBO)
4187         {
4188             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4189             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4190         }
4191     }
4192
4193     LEAVE_GL();
4194     context_release(context);
4195
4196     /* For P8 textures we need to perform an inverse palette lookup. This is
4197      * done by searching for a palette index which matches the RGB value.
4198      * Note this isn't guaranteed to work when there are multiple entries for
4199      * the same color but we have no choice. In case of P8 render targets,
4200      * the index is stored in the alpha component so no conversion is needed. */
4201     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4202     {
4203         const PALETTEENTRY *pal = NULL;
4204         DWORD width = pitch / 3;
4205         int x, y, c;
4206
4207         if (surface->palette)
4208         {
4209             pal = surface->palette->palents;
4210         }
4211         else
4212         {
4213             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4214             HeapFree(GetProcessHeap(), 0, mem);
4215             return;
4216         }
4217
4218         for(y = local_rect.top; y < local_rect.bottom; y++) {
4219             for(x = local_rect.left; x < local_rect.right; x++) {
4220                 /*                      start              lines            pixels      */
4221                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4222                 const BYTE *green = blue  + 1;
4223                 const BYTE *red = green + 1;
4224
4225                 for(c = 0; c < 256; c++) {
4226                     if(*red   == pal[c].peRed   &&
4227                        *green == pal[c].peGreen &&
4228                        *blue  == pal[c].peBlue)
4229                     {
4230                         *((BYTE *) dest + y * width + x) = c;
4231                         break;
4232                     }
4233                 }
4234             }
4235         }
4236         HeapFree(GetProcessHeap(), 0, mem);
4237     }
4238 }
4239
4240 /* Read the framebuffer contents into a texture. Note that this function
4241  * doesn't do any kind of flipping. Using this on an onscreen surface will
4242  * result in a flipped D3D texture. */
4243 void surface_load_fb_texture(struct wined3d_surface *surface, BOOL srgb)
4244 {
4245     struct wined3d_device *device = surface->resource.device;
4246     struct wined3d_context *context;
4247
4248     context = context_acquire(device, surface);
4249     device_invalidate_state(device, STATE_FRAMEBUFFER);
4250
4251     surface_prepare_texture(surface, context, srgb);
4252     surface_bind_and_dirtify(surface, context, srgb);
4253
4254     TRACE("Reading back offscreen render target %p.\n", surface);
4255
4256     ENTER_GL();
4257
4258     if (surface_is_offscreen(surface))
4259         glReadBuffer(device->offscreenBuffer);
4260     else
4261         glReadBuffer(surface_get_gl_buffer(surface));
4262     checkGLcall("glReadBuffer");
4263
4264     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4265             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4266     checkGLcall("glCopyTexSubImage2D");
4267
4268     LEAVE_GL();
4269
4270     context_release(context);
4271 }
4272
4273 /* Context activation is done by the caller. */
4274 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4275         struct wined3d_context *context, BOOL srgb)
4276 {
4277     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4278     CONVERT_TYPES convert;
4279     struct wined3d_format format;
4280
4281     if (surface->flags & alloc_flag) return;
4282
4283     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4284     if (convert != NO_CONVERSION || format.convert) surface->flags |= SFLAG_CONVERTED;
4285     else surface->flags &= ~SFLAG_CONVERTED;
4286
4287     surface_bind_and_dirtify(surface, context, srgb);
4288     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4289     surface->flags |= alloc_flag;
4290 }
4291
4292 /* Context activation is done by the caller. */
4293 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4294 {
4295     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4296     {
4297         struct wined3d_texture *texture = surface->container.u.texture;
4298         UINT sub_count = texture->level_count * texture->layer_count;
4299         UINT i;
4300
4301         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4302
4303         for (i = 0; i < sub_count; ++i)
4304         {
4305             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4306             surface_prepare_texture_internal(s, context, srgb);
4307         }
4308
4309         return;
4310     }
4311
4312     surface_prepare_texture_internal(surface, context, srgb);
4313 }
4314
4315 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4316 {
4317     if (multisample)
4318     {
4319         if (surface->rb_multisample)
4320             return;
4321
4322         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4323         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4324         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4325                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4326         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4327     }
4328     else
4329     {
4330         if (surface->rb_resolved)
4331             return;
4332
4333         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4334         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4335         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4336                 surface->pow2Width, surface->pow2Height);
4337         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4338     }
4339 }
4340
4341 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4342         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4343 {
4344     struct wined3d_device *device = surface->resource.device;
4345     UINT pitch = wined3d_surface_get_pitch(surface);
4346     const struct wined3d_gl_info *gl_info;
4347     struct wined3d_context *context;
4348     RECT local_rect;
4349     UINT w, h;
4350
4351     surface_get_rect(surface, rect, &local_rect);
4352
4353     mem += local_rect.top * pitch + local_rect.left * bpp;
4354     w = local_rect.right - local_rect.left;
4355     h = local_rect.bottom - local_rect.top;
4356
4357     /* Activate the correct context for the render target */
4358     context = context_acquire(device, surface);
4359     context_apply_blit_state(context, device);
4360     gl_info = context->gl_info;
4361
4362     ENTER_GL();
4363
4364     if (!surface_is_offscreen(surface))
4365     {
4366         GLenum buffer = surface_get_gl_buffer(surface);
4367         TRACE("Unlocking %#x buffer.\n", buffer);
4368         context_set_draw_buffer(context, buffer);
4369
4370         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4371         glPixelZoom(1.0f, -1.0f);
4372     }
4373     else
4374     {
4375         /* Primary offscreen render target */
4376         TRACE("Offscreen render target.\n");
4377         context_set_draw_buffer(context, device->offscreenBuffer);
4378
4379         glPixelZoom(1.0f, 1.0f);
4380     }
4381
4382     glRasterPos3i(local_rect.left, local_rect.top, 1);
4383     checkGLcall("glRasterPos3i");
4384
4385     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4386     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4387
4388     if (surface->flags & SFLAG_PBO)
4389     {
4390         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4391         checkGLcall("glBindBufferARB");
4392     }
4393
4394     glDrawPixels(w, h, fmt, type, mem);
4395     checkGLcall("glDrawPixels");
4396
4397     if (surface->flags & SFLAG_PBO)
4398     {
4399         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4400         checkGLcall("glBindBufferARB");
4401     }
4402
4403     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4404     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4405
4406     LEAVE_GL();
4407
4408     if (wined3d_settings.strict_draw_ordering
4409             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4410             && surface->container.u.swapchain->front_buffer == surface))
4411         wglFlush();
4412
4413     context_release(context);
4414 }
4415
4416 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck,
4417         BOOL use_texturing, struct wined3d_format *format, CONVERT_TYPES *convert)
4418 {
4419     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4420     const struct wined3d_device *device = surface->resource.device;
4421     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4422     BOOL blit_supported = FALSE;
4423
4424     /* Copy the default values from the surface. Below we might perform fixups */
4425     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4426     *format = *surface->resource.format;
4427     *convert = NO_CONVERSION;
4428
4429     /* Ok, now look if we have to do any conversion */
4430     switch (surface->resource.format->id)
4431     {
4432         case WINED3DFMT_P8_UINT:
4433             /* Below the call to blit_supported is disabled for Wine 1.2
4434              * because the function isn't operating correctly yet. At the
4435              * moment 8-bit blits are handled in software and if certain GL
4436              * extensions are around, surface conversion is performed at
4437              * upload time. The blit_supported call recognizes it as a
4438              * destination fixup. This type of upload 'fixup' and 8-bit to
4439              * 8-bit blits need to be handled by the blit_shader.
4440              * TODO: get rid of this #if 0. */
4441 #if 0
4442             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4443                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4444                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4445 #endif
4446             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4447
4448             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4449              * texturing. Further also use conversion in case of color keying.
4450              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4451              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4452              * conflicts with this.
4453              */
4454             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4455                     || colorkey_active || !use_texturing)
4456             {
4457                 format->glFormat = GL_RGBA;
4458                 format->glInternal = GL_RGBA;
4459                 format->glType = GL_UNSIGNED_BYTE;
4460                 format->conv_byte_count = 4;
4461                 if (colorkey_active)
4462                     *convert = CONVERT_PALETTED_CK;
4463                 else
4464                     *convert = CONVERT_PALETTED;
4465             }
4466             break;
4467
4468         case WINED3DFMT_B2G3R3_UNORM:
4469             /* **********************
4470                 GL_UNSIGNED_BYTE_3_3_2
4471                 ********************** */
4472             if (colorkey_active) {
4473                 /* This texture format will never be used.. So do not care about color keying
4474                     up until the point in time it will be needed :-) */
4475                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4476             }
4477             break;
4478
4479         case WINED3DFMT_B5G6R5_UNORM:
4480             if (colorkey_active)
4481             {
4482                 *convert = CONVERT_CK_565;
4483                 format->glFormat = GL_RGBA;
4484                 format->glInternal = GL_RGB5_A1;
4485                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4486                 format->conv_byte_count = 2;
4487             }
4488             break;
4489
4490         case WINED3DFMT_B5G5R5X1_UNORM:
4491             if (colorkey_active)
4492             {
4493                 *convert = CONVERT_CK_5551;
4494                 format->glFormat = GL_BGRA;
4495                 format->glInternal = GL_RGB5_A1;
4496                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4497                 format->conv_byte_count = 2;
4498             }
4499             break;
4500
4501         case WINED3DFMT_B8G8R8_UNORM:
4502             if (colorkey_active)
4503             {
4504                 *convert = CONVERT_CK_RGB24;
4505                 format->glFormat = GL_RGBA;
4506                 format->glInternal = GL_RGBA8;
4507                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4508                 format->conv_byte_count = 4;
4509             }
4510             break;
4511
4512         case WINED3DFMT_B8G8R8X8_UNORM:
4513             if (colorkey_active)
4514             {
4515                 *convert = CONVERT_RGB32_888;
4516                 format->glFormat = GL_RGBA;
4517                 format->glInternal = GL_RGBA8;
4518                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4519                 format->conv_byte_count = 4;
4520             }
4521             break;
4522
4523         default:
4524             break;
4525     }
4526
4527     return WINED3D_OK;
4528 }
4529
4530 static BOOL color_in_range(const struct wined3d_color_key *color_key, DWORD color)
4531 {
4532     /* FIXME: Is this really how color keys are supposed to work? I think it
4533      * makes more sense to compare the individual channels. */
4534     return color >= color_key->color_space_low_value
4535             && color <= color_key->color_space_high_value;
4536 }
4537
4538 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4539 {
4540     const struct wined3d_device *device = surface->resource.device;
4541     const struct wined3d_palette *pal = surface->palette;
4542     BOOL index_in_alpha = FALSE;
4543     unsigned int i;
4544
4545     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4546      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4547      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4548      * duplicate entries. Store the color key in the unused alpha component to speed the
4549      * download up and to make conversion unneeded. */
4550     index_in_alpha = primary_render_target_is_p8(device);
4551
4552     if (!pal)
4553     {
4554         ERR("This code should never get entered for DirectDraw!, expect problems\n");
4555         if (index_in_alpha)
4556         {
4557             /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4558              * there's no palette at this time. */
4559             for (i = 0; i < 256; i++) table[i][3] = i;
4560         }
4561     }
4562     else
4563     {
4564         TRACE("Using surface palette %p\n", pal);
4565         /* Get the surface's palette */
4566         for (i = 0; i < 256; ++i)
4567         {
4568             table[i][0] = pal->palents[i].peRed;
4569             table[i][1] = pal->palents[i].peGreen;
4570             table[i][2] = pal->palents[i].peBlue;
4571
4572             /* When index_in_alpha is set the palette index is stored in the
4573              * alpha component. In case of a readback we can then read
4574              * GL_ALPHA. Color keying is handled in BltOverride using a
4575              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4576              * color key itself is passed to glAlphaFunc in other cases the
4577              * alpha component of pixels that should be masked away is set to 0. */
4578             if (index_in_alpha)
4579                 table[i][3] = i;
4580             else if (colorkey && color_in_range(&surface->src_blt_color_key, i))
4581                 table[i][3] = 0x00;
4582             else if (pal->flags & WINEDDPCAPS_ALPHA)
4583                 table[i][3] = pal->palents[i].peFlags;
4584             else
4585                 table[i][3] = 0xFF;
4586         }
4587     }
4588 }
4589
4590 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width,
4591         UINT height, UINT outpitch, CONVERT_TYPES convert, struct wined3d_surface *surface)
4592 {
4593     const BYTE *source;
4594     BYTE *dest;
4595     TRACE("(%p)->(%p),(%d,%d,%d,%d,%p)\n", src, dst, pitch, height, outpitch, convert, surface);
4596
4597     switch (convert) {
4598         case NO_CONVERSION:
4599         {
4600             memcpy(dst, src, pitch * height);
4601             break;
4602         }
4603         case CONVERT_PALETTED:
4604         case CONVERT_PALETTED_CK:
4605         {
4606             BYTE table[256][4];
4607             unsigned int x, y;
4608
4609             d3dfmt_p8_init_palette(surface, table, (convert == CONVERT_PALETTED_CK));
4610
4611             for (y = 0; y < height; y++)
4612             {
4613                 source = src + pitch * y;
4614                 dest = dst + outpitch * y;
4615                 /* This is an 1 bpp format, using the width here is fine */
4616                 for (x = 0; x < width; x++) {
4617                     BYTE color = *source++;
4618                     *dest++ = table[color][0];
4619                     *dest++ = table[color][1];
4620                     *dest++ = table[color][2];
4621                     *dest++ = table[color][3];
4622                 }
4623             }
4624         }
4625         break;
4626
4627         case CONVERT_CK_565:
4628         {
4629             /* Converting the 565 format in 5551 packed to emulate color-keying.
4630
4631               Note : in all these conversion, it would be best to average the averaging
4632                       pixels to get the color of the pixel that will be color-keyed to
4633                       prevent 'color bleeding'. This will be done later on if ever it is
4634                       too visible.
4635
4636               Note2: Nvidia documents say that their driver does not support alpha + color keying
4637                      on the same surface and disables color keying in such a case
4638             */
4639             unsigned int x, y;
4640             const WORD *Source;
4641             WORD *Dest;
4642
4643             TRACE("Color keyed 565\n");
4644
4645             for (y = 0; y < height; y++) {
4646                 Source = (const WORD *)(src + y * pitch);
4647                 Dest = (WORD *) (dst + y * outpitch);
4648                 for (x = 0; x < width; x++ ) {
4649                     WORD color = *Source++;
4650                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4651                     if (!color_in_range(&surface->src_blt_color_key, color))
4652                         *Dest |= 0x0001;
4653                     Dest++;
4654                 }
4655             }
4656         }
4657         break;
4658
4659         case CONVERT_CK_5551:
4660         {
4661             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4662             unsigned int x, y;
4663             const WORD *Source;
4664             WORD *Dest;
4665             TRACE("Color keyed 5551\n");
4666             for (y = 0; y < height; y++) {
4667                 Source = (const WORD *)(src + y * pitch);
4668                 Dest = (WORD *) (dst + y * outpitch);
4669                 for (x = 0; x < width; x++ ) {
4670                     WORD color = *Source++;
4671                     *Dest = color;
4672                     if (!color_in_range(&surface->src_blt_color_key, color))
4673                         *Dest |= (1 << 15);
4674                     else
4675                         *Dest &= ~(1 << 15);
4676                     Dest++;
4677                 }
4678             }
4679         }
4680         break;
4681
4682         case CONVERT_CK_RGB24:
4683         {
4684             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4685             unsigned int x, y;
4686             for (y = 0; y < height; y++)
4687             {
4688                 source = src + pitch * y;
4689                 dest = dst + outpitch * y;
4690                 for (x = 0; x < width; x++) {
4691                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4692                     DWORD dstcolor = color << 8;
4693                     if (!color_in_range(&surface->src_blt_color_key, color))
4694                         dstcolor |= 0xff;
4695                     *(DWORD*)dest = dstcolor;
4696                     source += 3;
4697                     dest += 4;
4698                 }
4699             }
4700         }
4701         break;
4702
4703         case CONVERT_RGB32_888:
4704         {
4705             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4706             unsigned int x, y;
4707             for (y = 0; y < height; y++)
4708             {
4709                 source = src + pitch * y;
4710                 dest = dst + outpitch * y;
4711                 for (x = 0; x < width; x++) {
4712                     DWORD color = 0xffffff & *(const DWORD*)source;
4713                     DWORD dstcolor = color << 8;
4714                     if (!color_in_range(&surface->src_blt_color_key, color))
4715                         dstcolor |= 0xff;
4716                     *(DWORD*)dest = dstcolor;
4717                     source += 4;
4718                     dest += 4;
4719                 }
4720             }
4721         }
4722         break;
4723
4724         default:
4725             ERR("Unsupported conversion type %#x.\n", convert);
4726     }
4727     return WINED3D_OK;
4728 }
4729
4730 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4731 {
4732     /* Flip the surface contents */
4733     /* Flip the DC */
4734     {
4735         HDC tmp;
4736         tmp = front->hDC;
4737         front->hDC = back->hDC;
4738         back->hDC = tmp;
4739     }
4740
4741     /* Flip the DIBsection */
4742     {
4743         HBITMAP tmp;
4744         BOOL hasDib = front->flags & SFLAG_DIBSECTION;
4745         tmp = front->dib.DIBsection;
4746         front->dib.DIBsection = back->dib.DIBsection;
4747         back->dib.DIBsection = tmp;
4748
4749         if (back->flags & SFLAG_DIBSECTION) front->flags |= SFLAG_DIBSECTION;
4750         else front->flags &= ~SFLAG_DIBSECTION;
4751         if (hasDib) back->flags |= SFLAG_DIBSECTION;
4752         else back->flags &= ~SFLAG_DIBSECTION;
4753     }
4754
4755     /* Flip the surface data */
4756     {
4757         void* tmp;
4758
4759         tmp = front->dib.bitmap_data;
4760         front->dib.bitmap_data = back->dib.bitmap_data;
4761         back->dib.bitmap_data = tmp;
4762
4763         tmp = front->resource.allocatedMemory;
4764         front->resource.allocatedMemory = back->resource.allocatedMemory;
4765         back->resource.allocatedMemory = tmp;
4766
4767         tmp = front->resource.heapMemory;
4768         front->resource.heapMemory = back->resource.heapMemory;
4769         back->resource.heapMemory = tmp;
4770     }
4771
4772     /* Flip the PBO */
4773     {
4774         GLuint tmp_pbo = front->pbo;
4775         front->pbo = back->pbo;
4776         back->pbo = tmp_pbo;
4777     }
4778
4779     /* Flip the opengl texture */
4780     {
4781         GLuint tmp;
4782
4783         tmp = back->texture_name;
4784         back->texture_name = front->texture_name;
4785         front->texture_name = tmp;
4786
4787         tmp = back->texture_name_srgb;
4788         back->texture_name_srgb = front->texture_name_srgb;
4789         front->texture_name_srgb = tmp;
4790
4791         tmp = back->rb_multisample;
4792         back->rb_multisample = front->rb_multisample;
4793         front->rb_multisample = tmp;
4794
4795         tmp = back->rb_resolved;
4796         back->rb_resolved = front->rb_resolved;
4797         front->rb_resolved = tmp;
4798
4799         resource_unload(&back->resource);
4800         resource_unload(&front->resource);
4801     }
4802
4803     {
4804         DWORD tmp_flags = back->flags;
4805         back->flags = front->flags;
4806         front->flags = tmp_flags;
4807     }
4808 }
4809
4810 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4811  * pixel copy calls. */
4812 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4813         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4814 {
4815     struct wined3d_device *device = dst_surface->resource.device;
4816     float xrel, yrel;
4817     UINT row;
4818     struct wined3d_context *context;
4819     BOOL upsidedown = FALSE;
4820     RECT dst_rect = *dst_rect_in;
4821
4822     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4823      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4824      */
4825     if(dst_rect.top > dst_rect.bottom) {
4826         UINT tmp = dst_rect.bottom;
4827         dst_rect.bottom = dst_rect.top;
4828         dst_rect.top = tmp;
4829         upsidedown = TRUE;
4830     }
4831
4832     context = context_acquire(device, src_surface);
4833     context_apply_blit_state(context, device);
4834     surface_internal_preload(dst_surface, SRGB_RGB);
4835     ENTER_GL();
4836
4837     /* Bind the target texture */
4838     context_bind_texture(context, dst_surface->texture_target, dst_surface->texture_name);
4839     if (surface_is_offscreen(src_surface))
4840     {
4841         TRACE("Reading from an offscreen target\n");
4842         upsidedown = !upsidedown;
4843         glReadBuffer(device->offscreenBuffer);
4844     }
4845     else
4846     {
4847         glReadBuffer(surface_get_gl_buffer(src_surface));
4848     }
4849     checkGLcall("glReadBuffer");
4850
4851     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4852     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4853
4854     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4855     {
4856         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4857
4858         if(Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT) {
4859             ERR("Texture filtering not supported in direct blit\n");
4860         }
4861     }
4862     else if ((Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT)
4863             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4864     {
4865         ERR("Texture filtering not supported in direct blit\n");
4866     }
4867
4868     if (upsidedown
4869             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4870             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4871     {
4872         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4873
4874         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4875                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4876                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4877                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4878     }
4879     else
4880     {
4881         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4882         /* I have to process this row by row to swap the image,
4883          * otherwise it would be upside down, so stretching in y direction
4884          * doesn't cost extra time
4885          *
4886          * However, stretching in x direction can be avoided if not necessary
4887          */
4888         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4889             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4890             {
4891                 /* Well, that stuff works, but it's very slow.
4892                  * find a better way instead
4893                  */
4894                 UINT col;
4895
4896                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4897                 {
4898                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4899                             dst_rect.left + col /* x offset */, row /* y offset */,
4900                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4901                 }
4902             }
4903             else
4904             {
4905                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4906                         dst_rect.left /* x offset */, row /* y offset */,
4907                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4908             }
4909         }
4910     }
4911     checkGLcall("glCopyTexSubImage2D");
4912
4913     LEAVE_GL();
4914     context_release(context);
4915
4916     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4917      * path is never entered
4918      */
4919     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4920 }
4921
4922 /* Uses the hardware to stretch and flip the image */
4923 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4924         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4925 {
4926     struct wined3d_device *device = dst_surface->resource.device;
4927     struct wined3d_swapchain *src_swapchain = NULL;
4928     GLuint src, backup = 0;
4929     float left, right, top, bottom; /* Texture coordinates */
4930     UINT fbwidth = src_surface->resource.width;
4931     UINT fbheight = src_surface->resource.height;
4932     struct wined3d_context *context;
4933     GLenum drawBuffer = GL_BACK;
4934     GLenum texture_target;
4935     BOOL noBackBufferBackup;
4936     BOOL src_offscreen;
4937     BOOL upsidedown = FALSE;
4938     RECT dst_rect = *dst_rect_in;
4939
4940     TRACE("Using hwstretch blit\n");
4941     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4942     context = context_acquire(device, src_surface);
4943     context_apply_blit_state(context, device);
4944     surface_internal_preload(dst_surface, SRGB_RGB);
4945
4946     src_offscreen = surface_is_offscreen(src_surface);
4947     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4948     if (!noBackBufferBackup && !src_surface->texture_name)
4949     {
4950         /* Get it a description */
4951         surface_internal_preload(src_surface, SRGB_RGB);
4952     }
4953     ENTER_GL();
4954
4955     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4956      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4957      */
4958     if (context->aux_buffers >= 2)
4959     {
4960         /* Got more than one aux buffer? Use the 2nd aux buffer */
4961         drawBuffer = GL_AUX1;
4962     }
4963     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4964     {
4965         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
4966         drawBuffer = GL_AUX0;
4967     }
4968
4969     if(noBackBufferBackup) {
4970         glGenTextures(1, &backup);
4971         checkGLcall("glGenTextures");
4972         context_bind_texture(context, GL_TEXTURE_2D, backup);
4973         texture_target = GL_TEXTURE_2D;
4974     } else {
4975         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
4976          * we are reading from the back buffer, the backup can be used as source texture
4977          */
4978         texture_target = src_surface->texture_target;
4979         context_bind_texture(context, texture_target, src_surface->texture_name);
4980         glEnable(texture_target);
4981         checkGLcall("glEnable(texture_target)");
4982
4983         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
4984         src_surface->flags &= ~SFLAG_INTEXTURE;
4985     }
4986
4987     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4988      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4989      */
4990     if(dst_rect.top > dst_rect.bottom) {
4991         UINT tmp = dst_rect.bottom;
4992         dst_rect.bottom = dst_rect.top;
4993         dst_rect.top = tmp;
4994         upsidedown = TRUE;
4995     }
4996
4997     if (src_offscreen)
4998     {
4999         TRACE("Reading from an offscreen target\n");
5000         upsidedown = !upsidedown;
5001         glReadBuffer(device->offscreenBuffer);
5002     }
5003     else
5004     {
5005         glReadBuffer(surface_get_gl_buffer(src_surface));
5006     }
5007
5008     /* TODO: Only back up the part that will be overwritten */
5009     glCopyTexSubImage2D(texture_target, 0,
5010                         0, 0 /* read offsets */,
5011                         0, 0,
5012                         fbwidth,
5013                         fbheight);
5014
5015     checkGLcall("glCopyTexSubImage2D");
5016
5017     /* No issue with overriding these - the sampler is dirty due to blit usage */
5018     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
5019             wined3d_gl_mag_filter(magLookup, Filter));
5020     checkGLcall("glTexParameteri");
5021     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
5022             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
5023     checkGLcall("glTexParameteri");
5024
5025     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5026         src_swapchain = src_surface->container.u.swapchain;
5027     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
5028     {
5029         src = backup ? backup : src_surface->texture_name;
5030     }
5031     else
5032     {
5033         glReadBuffer(GL_FRONT);
5034         checkGLcall("glReadBuffer(GL_FRONT)");
5035
5036         glGenTextures(1, &src);
5037         checkGLcall("glGenTextures(1, &src)");
5038         context_bind_texture(context, GL_TEXTURE_2D, src);
5039
5040         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5041          * out for power of 2 sizes
5042          */
5043         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5044                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5045         checkGLcall("glTexImage2D");
5046         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
5047                             0, 0 /* read offsets */,
5048                             0, 0,
5049                             fbwidth,
5050                             fbheight);
5051
5052         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5053         checkGLcall("glTexParameteri");
5054         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5055         checkGLcall("glTexParameteri");
5056
5057         glReadBuffer(GL_BACK);
5058         checkGLcall("glReadBuffer(GL_BACK)");
5059
5060         if(texture_target != GL_TEXTURE_2D) {
5061             glDisable(texture_target);
5062             glEnable(GL_TEXTURE_2D);
5063             texture_target = GL_TEXTURE_2D;
5064         }
5065     }
5066     checkGLcall("glEnd and previous");
5067
5068     left = src_rect->left;
5069     right = src_rect->right;
5070
5071     if (!upsidedown)
5072     {
5073         top = src_surface->resource.height - src_rect->top;
5074         bottom = src_surface->resource.height - src_rect->bottom;
5075     }
5076     else
5077     {
5078         top = src_surface->resource.height - src_rect->bottom;
5079         bottom = src_surface->resource.height - src_rect->top;
5080     }
5081
5082     if (src_surface->flags & SFLAG_NORMCOORD)
5083     {
5084         left /= src_surface->pow2Width;
5085         right /= src_surface->pow2Width;
5086         top /= src_surface->pow2Height;
5087         bottom /= src_surface->pow2Height;
5088     }
5089
5090     /* draw the source texture stretched and upside down. The correct surface is bound already */
5091     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5092     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5093
5094     context_set_draw_buffer(context, drawBuffer);
5095     glReadBuffer(drawBuffer);
5096
5097     glBegin(GL_QUADS);
5098         /* bottom left */
5099         glTexCoord2f(left, bottom);
5100         glVertex2i(0, 0);
5101
5102         /* top left */
5103         glTexCoord2f(left, top);
5104         glVertex2i(0, dst_rect.bottom - dst_rect.top);
5105
5106         /* top right */
5107         glTexCoord2f(right, top);
5108         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5109
5110         /* bottom right */
5111         glTexCoord2f(right, bottom);
5112         glVertex2i(dst_rect.right - dst_rect.left, 0);
5113     glEnd();
5114     checkGLcall("glEnd and previous");
5115
5116     if (texture_target != dst_surface->texture_target)
5117     {
5118         glDisable(texture_target);
5119         glEnable(dst_surface->texture_target);
5120         texture_target = dst_surface->texture_target;
5121     }
5122
5123     /* Now read the stretched and upside down image into the destination texture */
5124     context_bind_texture(context, texture_target, dst_surface->texture_name);
5125     glCopyTexSubImage2D(texture_target,
5126                         0,
5127                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5128                         0, 0, /* We blitted the image to the origin */
5129                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5130     checkGLcall("glCopyTexSubImage2D");
5131
5132     if(drawBuffer == GL_BACK) {
5133         /* Write the back buffer backup back */
5134         if(backup) {
5135             if(texture_target != GL_TEXTURE_2D) {
5136                 glDisable(texture_target);
5137                 glEnable(GL_TEXTURE_2D);
5138                 texture_target = GL_TEXTURE_2D;
5139             }
5140             context_bind_texture(context, GL_TEXTURE_2D, backup);
5141         }
5142         else
5143         {
5144             if (texture_target != src_surface->texture_target)
5145             {
5146                 glDisable(texture_target);
5147                 glEnable(src_surface->texture_target);
5148                 texture_target = src_surface->texture_target;
5149             }
5150             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5151         }
5152
5153         glBegin(GL_QUADS);
5154             /* top left */
5155             glTexCoord2f(0.0f, 0.0f);
5156             glVertex2i(0, fbheight);
5157
5158             /* bottom left */
5159             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5160             glVertex2i(0, 0);
5161
5162             /* bottom right */
5163             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5164                     (float)fbheight / (float)src_surface->pow2Height);
5165             glVertex2i(fbwidth, 0);
5166
5167             /* top right */
5168             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5169             glVertex2i(fbwidth, fbheight);
5170         glEnd();
5171     }
5172     glDisable(texture_target);
5173     checkGLcall("glDisable(texture_target)");
5174
5175     /* Cleanup */
5176     if (src != src_surface->texture_name && src != backup)
5177     {
5178         glDeleteTextures(1, &src);
5179         checkGLcall("glDeleteTextures(1, &src)");
5180     }
5181     if(backup) {
5182         glDeleteTextures(1, &backup);
5183         checkGLcall("glDeleteTextures(1, &backup)");
5184     }
5185
5186     LEAVE_GL();
5187
5188     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5189
5190     context_release(context);
5191
5192     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5193      * path is never entered
5194      */
5195     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5196 }
5197
5198 /* Front buffer coordinates are always full screen coordinates, but our GL
5199  * drawable is limited to the window's client area. The sysmem and texture
5200  * copies do have the full screen size. Note that GL has a bottom-left
5201  * origin, while D3D has a top-left origin. */
5202 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5203 {
5204     UINT drawable_height;
5205
5206     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5207             && surface == surface->container.u.swapchain->front_buffer)
5208     {
5209         POINT offset = {0, 0};
5210         RECT windowsize;
5211
5212         ScreenToClient(window, &offset);
5213         OffsetRect(rect, offset.x, offset.y);
5214
5215         GetClientRect(window, &windowsize);
5216         drawable_height = windowsize.bottom - windowsize.top;
5217     }
5218     else
5219     {
5220         drawable_height = surface->resource.height;
5221     }
5222
5223     rect->top = drawable_height - rect->top;
5224     rect->bottom = drawable_height - rect->bottom;
5225 }
5226
5227 static void surface_blt_to_drawable(const struct wined3d_device *device,
5228         WINED3DTEXTUREFILTERTYPE filter, BOOL color_key,
5229         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5230         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5231 {
5232     struct wined3d_context *context;
5233     RECT src_rect, dst_rect;
5234
5235     src_rect = *src_rect_in;
5236     dst_rect = *dst_rect_in;
5237
5238     /* Make sure the surface is up-to-date. This should probably use
5239      * surface_load_location() and worry about the destination surface too,
5240      * unless we're overwriting it completely. */
5241     surface_internal_preload(src_surface, SRGB_RGB);
5242
5243     /* Activate the destination context, set it up for blitting */
5244     context = context_acquire(device, dst_surface);
5245     context_apply_blit_state(context, device);
5246
5247     if (!surface_is_offscreen(dst_surface))
5248         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5249
5250     device->blitter->set_shader(device->blit_priv, context, src_surface);
5251
5252     ENTER_GL();
5253
5254     if (color_key)
5255     {
5256         glEnable(GL_ALPHA_TEST);
5257         checkGLcall("glEnable(GL_ALPHA_TEST)");
5258
5259         /* When the primary render target uses P8, the alpha component
5260          * contains the palette index. Which means that the colorkey is one of
5261          * the palette entries. In other cases pixels that should be masked
5262          * away have alpha set to 0. */
5263         if (primary_render_target_is_p8(device))
5264             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->src_blt_color_key.color_space_low_value / 256.0f);
5265         else
5266             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5267         checkGLcall("glAlphaFunc");
5268     }
5269     else
5270     {
5271         glDisable(GL_ALPHA_TEST);
5272         checkGLcall("glDisable(GL_ALPHA_TEST)");
5273     }
5274
5275     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5276
5277     if (color_key)
5278     {
5279         glDisable(GL_ALPHA_TEST);
5280         checkGLcall("glDisable(GL_ALPHA_TEST)");
5281     }
5282
5283     LEAVE_GL();
5284
5285     /* Leave the opengl state valid for blitting */
5286     device->blitter->unset_shader(context->gl_info);
5287
5288     if (wined3d_settings.strict_draw_ordering
5289             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5290             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5291         wglFlush(); /* Flush to ensure ordering across contexts. */
5292
5293     context_release(context);
5294 }
5295
5296 /* Do not call while under the GL lock. */
5297 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const struct wined3d_color *color)
5298 {
5299     struct wined3d_device *device = s->resource.device;
5300     const struct blit_shader *blitter;
5301
5302     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5303             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5304     if (!blitter)
5305     {
5306         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5307         return WINED3DERR_INVALIDCALL;
5308     }
5309
5310     return blitter->color_fill(device, s, rect, color);
5311 }
5312
5313 /* Do not call while under the GL lock. */
5314 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5315         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5316         WINED3DTEXTUREFILTERTYPE Filter)
5317 {
5318     struct wined3d_device *device = dst_surface->resource.device;
5319     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5320     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5321
5322     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5323             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5324             flags, DDBltFx, debug_d3dtexturefiltertype(Filter));
5325
5326     /* Get the swapchain. One of the surfaces has to be a primary surface */
5327     if (dst_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5328     {
5329         WARN("Destination is in sysmem, rejecting gl blt\n");
5330         return WINED3DERR_INVALIDCALL;
5331     }
5332
5333     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5334         dstSwapchain = dst_surface->container.u.swapchain;
5335
5336     if (src_surface)
5337     {
5338         if (src_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5339         {
5340             WARN("Src is in sysmem, rejecting gl blt\n");
5341             return WINED3DERR_INVALIDCALL;
5342         }
5343
5344         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5345             srcSwapchain = src_surface->container.u.swapchain;
5346     }
5347
5348     /* Early sort out of cases where no render target is used */
5349     if (!dstSwapchain && !srcSwapchain
5350             && src_surface != device->fb.render_targets[0]
5351             && dst_surface != device->fb.render_targets[0])
5352     {
5353         TRACE("No surface is render target, not using hardware blit.\n");
5354         return WINED3DERR_INVALIDCALL;
5355     }
5356
5357     /* No destination color keying supported */
5358     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5359     {
5360         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5361         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5362         return WINED3DERR_INVALIDCALL;
5363     }
5364
5365     if (dstSwapchain && dstSwapchain == srcSwapchain)
5366     {
5367         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5368         return WINED3DERR_INVALIDCALL;
5369     }
5370
5371     if (dstSwapchain && srcSwapchain)
5372     {
5373         FIXME("Implement hardware blit between two different swapchains\n");
5374         return WINED3DERR_INVALIDCALL;
5375     }
5376
5377     if (dstSwapchain)
5378     {
5379         /* Handled with regular texture -> swapchain blit */
5380         if (src_surface == device->fb.render_targets[0])
5381             TRACE("Blit from active render target to a swapchain\n");
5382     }
5383     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5384     {
5385         FIXME("Implement blit from a swapchain to the active render target\n");
5386         return WINED3DERR_INVALIDCALL;
5387     }
5388
5389     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5390     {
5391         /* Blit from render target to texture */
5392         BOOL stretchx;
5393
5394         /* P8 read back is not implemented */
5395         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5396                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5397         {
5398             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5399             return WINED3DERR_INVALIDCALL;
5400         }
5401
5402         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5403         {
5404             TRACE("Color keying not supported by frame buffer to texture blit\n");
5405             return WINED3DERR_INVALIDCALL;
5406             /* Destination color key is checked above */
5407         }
5408
5409         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5410             stretchx = TRUE;
5411         else
5412             stretchx = FALSE;
5413
5414         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5415          * flip the image nor scale it.
5416          *
5417          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5418          * -> If the app wants a image width an unscaled width, copy it line per line
5419          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5420          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5421          *    back buffer. This is slower than reading line per line, thus not used for flipping
5422          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5423          *    pixel by pixel. */
5424         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5425                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5426         {
5427             TRACE("No stretching in x direction, using direct framebuffer -> texture copy\n");
5428             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, Filter);
5429         } else {
5430             TRACE("Using hardware stretching to flip / stretch the texture\n");
5431             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, Filter);
5432         }
5433
5434         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5435         {
5436             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5437             dst_surface->resource.allocatedMemory = NULL;
5438             dst_surface->resource.heapMemory = NULL;
5439         }
5440         else
5441         {
5442             dst_surface->flags &= ~SFLAG_INSYSMEM;
5443         }
5444
5445         return WINED3D_OK;
5446     }
5447     else if (src_surface)
5448     {
5449         /* Blit from offscreen surface to render target */
5450         struct wined3d_color_key old_blt_key = src_surface->src_blt_color_key;
5451         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5452
5453         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5454
5455         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5456                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5457                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5458         {
5459             FIXME("Unsupported blit operation falling back to software\n");
5460             return WINED3DERR_INVALIDCALL;
5461         }
5462
5463         /* Color keying: Check if we have to do a color keyed blt,
5464          * and if not check if a color key is activated.
5465          *
5466          * Just modify the color keying parameters in the surface and restore them afterwards
5467          * The surface keeps track of the color key last used to load the opengl surface.
5468          * PreLoad will catch the change to the flags and color key and reload if necessary.
5469          */
5470         if (flags & WINEDDBLT_KEYSRC)
5471         {
5472             /* Use color key from surface */
5473         }
5474         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5475         {
5476             /* Use color key from DDBltFx */
5477             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5478             src_surface->src_blt_color_key = DDBltFx->ddckSrcColorkey;
5479         }
5480         else
5481         {
5482             /* Do not use color key */
5483             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5484         }
5485
5486         surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5487                 src_surface, src_rect, dst_surface, dst_rect);
5488
5489         /* Restore the color key parameters */
5490         src_surface->CKeyFlags = oldCKeyFlags;
5491         src_surface->src_blt_color_key = old_blt_key;
5492
5493         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5494
5495         return WINED3D_OK;
5496     }
5497
5498     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5499     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5500     return WINED3DERR_INVALIDCALL;
5501 }
5502
5503 /* GL locking is done by the caller */
5504 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5505         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5506 {
5507     struct wined3d_device *device = surface->resource.device;
5508     const struct wined3d_gl_info *gl_info = context->gl_info;
5509     GLint compare_mode = GL_NONE;
5510     struct blt_info info;
5511     GLint old_binding = 0;
5512     RECT rect;
5513
5514     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5515
5516     glDisable(GL_CULL_FACE);
5517     glDisable(GL_BLEND);
5518     glDisable(GL_ALPHA_TEST);
5519     glDisable(GL_SCISSOR_TEST);
5520     glDisable(GL_STENCIL_TEST);
5521     glEnable(GL_DEPTH_TEST);
5522     glDepthFunc(GL_ALWAYS);
5523     glDepthMask(GL_TRUE);
5524     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5525     glViewport(x, y, w, h);
5526
5527     SetRect(&rect, 0, h, w, 0);
5528     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5529     context_active_texture(context, context->gl_info, 0);
5530     glGetIntegerv(info.binding, &old_binding);
5531     glBindTexture(info.bind_target, texture);
5532     if (gl_info->supported[ARB_SHADOW])
5533     {
5534         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5535         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5536     }
5537
5538     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5539             gl_info, info.tex_type, &surface->ds_current_size);
5540
5541     glBegin(GL_TRIANGLE_STRIP);
5542     glTexCoord3fv(info.coords[0]);
5543     glVertex2f(-1.0f, -1.0f);
5544     glTexCoord3fv(info.coords[1]);
5545     glVertex2f(1.0f, -1.0f);
5546     glTexCoord3fv(info.coords[2]);
5547     glVertex2f(-1.0f, 1.0f);
5548     glTexCoord3fv(info.coords[3]);
5549     glVertex2f(1.0f, 1.0f);
5550     glEnd();
5551
5552     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5553     glBindTexture(info.bind_target, old_binding);
5554
5555     glPopAttrib();
5556
5557     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5558 }
5559
5560 void surface_modify_ds_location(struct wined3d_surface *surface,
5561         DWORD location, UINT w, UINT h)
5562 {
5563     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5564
5565     if (location & ~SFLAG_DS_LOCATIONS)
5566         FIXME("Invalid location (%#x) specified.\n", location);
5567
5568     surface->ds_current_size.cx = w;
5569     surface->ds_current_size.cy = h;
5570     surface->flags &= ~SFLAG_DS_LOCATIONS;
5571     surface->flags |= location;
5572 }
5573
5574 /* Context activation is done by the caller. */
5575 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5576 {
5577     struct wined3d_device *device = surface->resource.device;
5578     GLsizei w, h;
5579
5580     TRACE("surface %p, new location %#x.\n", surface, location);
5581
5582     /* TODO: Make this work for modes other than FBO */
5583     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5584
5585     if (!(surface->flags & location))
5586     {
5587         w = surface->ds_current_size.cx;
5588         h = surface->ds_current_size.cy;
5589         surface->ds_current_size.cx = 0;
5590         surface->ds_current_size.cy = 0;
5591     }
5592     else
5593     {
5594         w = surface->resource.width;
5595         h = surface->resource.height;
5596     }
5597
5598     if (surface->ds_current_size.cx == surface->resource.width
5599             && surface->ds_current_size.cy == surface->resource.height)
5600     {
5601         TRACE("Location (%#x) is already up to date.\n", location);
5602         return;
5603     }
5604
5605     if (surface->current_renderbuffer)
5606     {
5607         FIXME("Not supported with fixed up depth stencil.\n");
5608         return;
5609     }
5610
5611     if (!(surface->flags & SFLAG_DS_LOCATIONS))
5612     {
5613         /* This mostly happens when a depth / stencil is used without being
5614          * cleared first. In principle we could upload from sysmem, or
5615          * explicitly clear before first usage. For the moment there don't
5616          * appear to be a lot of applications depending on this, so a FIXME
5617          * should do. */
5618         FIXME("No up to date depth stencil location.\n");
5619         surface->flags |= location;
5620         surface->ds_current_size.cx = surface->resource.width;
5621         surface->ds_current_size.cy = surface->resource.height;
5622         return;
5623     }
5624
5625     if (location == SFLAG_DS_OFFSCREEN)
5626     {
5627         GLint old_binding = 0;
5628         GLenum bind_target;
5629
5630         /* The render target is allowed to be smaller than the depth/stencil
5631          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5632          * than the offscreen surface. Don't overwrite the offscreen surface
5633          * with undefined data. */
5634         w = min(w, context->swapchain->desc.backbuffer_width);
5635         h = min(h, context->swapchain->desc.backbuffer_height);
5636
5637         TRACE("Copying onscreen depth buffer to depth texture.\n");
5638
5639         ENTER_GL();
5640
5641         if (!device->depth_blt_texture)
5642         {
5643             glGenTextures(1, &device->depth_blt_texture);
5644         }
5645
5646         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5647          * directly on the FBO texture. That's because we need to flip. */
5648         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5649                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5650         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5651         {
5652             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5653             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5654         }
5655         else
5656         {
5657             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5658             bind_target = GL_TEXTURE_2D;
5659         }
5660         glBindTexture(bind_target, device->depth_blt_texture);
5661         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5662          * internal format, because the internal format might include stencil
5663          * data. In principle we should copy stencil data as well, but unless
5664          * the driver supports stencil export it's hard to do, and doesn't
5665          * seem to be needed in practice. If the hardware doesn't support
5666          * writing stencil data, the glCopyTexImage2D() call might trigger
5667          * software fallbacks. */
5668         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5669         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5670         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5671         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5672         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5673         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5674         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5675         glBindTexture(bind_target, old_binding);
5676
5677         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5678                 NULL, surface, SFLAG_INTEXTURE);
5679         context_set_draw_buffer(context, GL_NONE);
5680         glReadBuffer(GL_NONE);
5681
5682         /* Do the actual blit */
5683         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5684         checkGLcall("depth_blt");
5685
5686         context_invalidate_state(context, STATE_FRAMEBUFFER);
5687
5688         LEAVE_GL();
5689
5690         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5691     }
5692     else if (location == SFLAG_DS_ONSCREEN)
5693     {
5694         TRACE("Copying depth texture to onscreen depth buffer.\n");
5695
5696         ENTER_GL();
5697
5698         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5699                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5700         surface_depth_blt(surface, context, surface->texture_name,
5701                 0, surface->pow2Height - h, w, h, surface->texture_target);
5702         checkGLcall("depth_blt");
5703
5704         context_invalidate_state(context, STATE_FRAMEBUFFER);
5705
5706         LEAVE_GL();
5707
5708         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5709     }
5710     else
5711     {
5712         ERR("Invalid location (%#x) specified.\n", location);
5713     }
5714
5715     surface->flags |= location;
5716     surface->ds_current_size.cx = surface->resource.width;
5717     surface->ds_current_size.cy = surface->resource.height;
5718 }
5719
5720 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5721 {
5722     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5723     struct wined3d_surface *overlay;
5724
5725     TRACE("surface %p, location %s, persistent %#x.\n",
5726             surface, debug_surflocation(location), persistent);
5727
5728     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5729             && (location & SFLAG_INDRAWABLE))
5730         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5731
5732     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5733             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5734         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5735
5736     if (persistent)
5737     {
5738         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5739                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5740         {
5741             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5742             {
5743                 TRACE("Passing to container.\n");
5744                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5745             }
5746         }
5747         surface->flags &= ~SFLAG_LOCATIONS;
5748         surface->flags |= location;
5749
5750         /* Redraw emulated overlays, if any */
5751         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5752         {
5753             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5754             {
5755                 surface_draw_overlay(overlay);
5756             }
5757         }
5758     }
5759     else
5760     {
5761         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5762         {
5763             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5764             {
5765                 TRACE("Passing to container\n");
5766                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5767             }
5768         }
5769         surface->flags &= ~location;
5770     }
5771
5772     if (!(surface->flags & SFLAG_LOCATIONS))
5773     {
5774         ERR("Surface %p does not have any up to date location.\n", surface);
5775     }
5776 }
5777
5778 static DWORD resource_access_from_location(DWORD location)
5779 {
5780     switch (location)
5781     {
5782         case SFLAG_INSYSMEM:
5783             return WINED3D_RESOURCE_ACCESS_CPU;
5784
5785         case SFLAG_INDRAWABLE:
5786         case SFLAG_INSRGBTEX:
5787         case SFLAG_INTEXTURE:
5788         case SFLAG_INRB_MULTISAMPLE:
5789         case SFLAG_INRB_RESOLVED:
5790             return WINED3D_RESOURCE_ACCESS_GPU;
5791
5792         default:
5793             FIXME("Unhandled location %#x.\n", location);
5794             return 0;
5795     }
5796 }
5797
5798 static void surface_load_sysmem(struct wined3d_surface *surface,
5799         const struct wined3d_gl_info *gl_info, const RECT *rect)
5800 {
5801     surface_prepare_system_memory(surface);
5802
5803     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED))
5804         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5805
5806     /* Download the surface to system memory. */
5807     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5808     {
5809         struct wined3d_device *device = surface->resource.device;
5810         struct wined3d_context *context;
5811
5812         /* TODO: Use already acquired context when possible. */
5813         context = context_acquire(device, NULL);
5814
5815         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5816         surface_download_data(surface, gl_info);
5817
5818         context_release(context);
5819
5820         return;
5821     }
5822
5823     if (surface->flags & SFLAG_INDRAWABLE)
5824     {
5825         read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5826                 wined3d_surface_get_pitch(surface));
5827         return;
5828     }
5829
5830     FIXME("Can't load surface %p with location flags %#x into sysmem.\n",
5831             surface, surface->flags & SFLAG_LOCATIONS);
5832 }
5833
5834 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5835         const struct wined3d_gl_info *gl_info, const RECT *rect)
5836 {
5837     struct wined3d_device *device = surface->resource.device;
5838     struct wined3d_format format;
5839     CONVERT_TYPES convert;
5840     UINT byte_count;
5841     BYTE *mem;
5842
5843     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5844     {
5845         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5846         return WINED3DERR_INVALIDCALL;
5847     }
5848
5849     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5850         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5851
5852     if (surface->flags & SFLAG_INTEXTURE)
5853     {
5854         RECT r;
5855
5856         surface_get_rect(surface, rect, &r);
5857         surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
5858
5859         return WINED3D_OK;
5860     }
5861
5862     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5863     {
5864         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5865          * path through sysmem. */
5866         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5867     }
5868
5869     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5870
5871     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5872      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5873      * called. */
5874     if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
5875     {
5876         struct wined3d_context *context;
5877
5878         TRACE("Removing the pbo attached to surface %p.\n", surface);
5879
5880         /* TODO: Use already acquired context when possible. */
5881         context = context_acquire(device, NULL);
5882
5883         surface_remove_pbo(surface, gl_info);
5884
5885         context_release(context);
5886     }
5887
5888     if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
5889     {
5890         UINT height = surface->resource.height;
5891         UINT width = surface->resource.width;
5892         UINT src_pitch, dst_pitch;
5893
5894         byte_count = format.conv_byte_count;
5895         src_pitch = wined3d_surface_get_pitch(surface);
5896
5897         /* Stick to the alignment for the converted surface too, makes it
5898          * easier to load the surface. */
5899         dst_pitch = width * byte_count;
5900         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5901
5902         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5903         {
5904             ERR("Out of memory (%u).\n", dst_pitch * height);
5905             return E_OUTOFMEMORY;
5906         }
5907
5908         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5909                 src_pitch, width, height, dst_pitch, convert, surface);
5910
5911         surface->flags |= SFLAG_CONVERTED;
5912     }
5913     else
5914     {
5915         surface->flags &= ~SFLAG_CONVERTED;
5916         mem = surface->resource.allocatedMemory;
5917         byte_count = format.byte_count;
5918     }
5919
5920     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
5921
5922     /* Don't delete PBO memory. */
5923     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
5924         HeapFree(GetProcessHeap(), 0, mem);
5925
5926     return WINED3D_OK;
5927 }
5928
5929 static HRESULT surface_load_texture(struct wined3d_surface *surface,
5930         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
5931 {
5932     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
5933     struct wined3d_device *device = surface->resource.device;
5934     struct wined3d_context *context;
5935     UINT width, src_pitch, dst_pitch;
5936     struct wined3d_bo_address data;
5937     struct wined3d_format format;
5938     POINT dst_point = {0, 0};
5939     CONVERT_TYPES convert;
5940     BYTE *mem;
5941
5942     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
5943             && surface_is_offscreen(surface)
5944             && (surface->flags & SFLAG_INDRAWABLE))
5945     {
5946         surface_load_fb_texture(surface, srgb);
5947
5948         return WINED3D_OK;
5949     }
5950
5951     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
5952             && (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB)
5953             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5954                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
5955                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
5956     {
5957         if (srgb)
5958             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INTEXTURE,
5959                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
5960         else
5961             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INSRGBTEX,
5962                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
5963
5964         return WINED3D_OK;
5965     }
5966
5967     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED)
5968             && (!srgb || (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB))
5969             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5970                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
5971                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
5972     {
5973         DWORD src_location = surface->flags & SFLAG_INRB_RESOLVED ? SFLAG_INRB_RESOLVED : SFLAG_INRB_MULTISAMPLE;
5974         DWORD dst_location = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
5975         RECT rect = {0, 0, surface->resource.width, surface->resource.height};
5976
5977         surface_blt_fbo(device, WINED3DTEXF_POINT, surface, src_location,
5978                 &rect, surface, dst_location, &rect);
5979
5980         return WINED3D_OK;
5981     }
5982
5983     /* Upload from system memory */
5984
5985     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
5986             TRUE /* We will use textures */, &format, &convert);
5987
5988     if (srgb)
5989     {
5990         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
5991         {
5992             /* Performance warning... */
5993             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
5994             surface_load_location(surface, SFLAG_INSYSMEM, rect);
5995         }
5996     }
5997     else
5998     {
5999         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6000         {
6001             /* Performance warning... */
6002             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6003             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6004         }
6005     }
6006
6007     if (!(surface->flags & SFLAG_INSYSMEM))
6008     {
6009         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6010         /* Lets hope we get it from somewhere... */
6011         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6012     }
6013
6014     /* TODO: Use already acquired context when possible. */
6015     context = context_acquire(device, NULL);
6016
6017     surface_prepare_texture(surface, context, srgb);
6018     surface_bind_and_dirtify(surface, context, srgb);
6019
6020     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6021     {
6022         surface->flags |= SFLAG_GLCKEY;
6023         surface->gl_color_key = surface->src_blt_color_key;
6024     }
6025     else surface->flags &= ~SFLAG_GLCKEY;
6026
6027     width = surface->resource.width;
6028     src_pitch = wined3d_surface_get_pitch(surface);
6029
6030     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6031      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6032      * called. */
6033     if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
6034     {
6035         TRACE("Removing the pbo attached to surface %p.\n", surface);
6036         surface_remove_pbo(surface, gl_info);
6037     }
6038
6039     if (format.convert)
6040     {
6041         /* This code is entered for texture formats which need a fixup. */
6042         UINT height = surface->resource.height;
6043
6044         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6045         dst_pitch = width * format.conv_byte_count;
6046         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6047
6048         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6049         {
6050             ERR("Out of memory (%u).\n", dst_pitch * height);
6051             context_release(context);
6052             return E_OUTOFMEMORY;
6053         }
6054         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6055         format.byte_count = format.conv_byte_count;
6056         src_pitch = dst_pitch;
6057     }
6058     else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
6059     {
6060         /* This code is only entered for color keying fixups */
6061         UINT height = surface->resource.height;
6062
6063         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6064         dst_pitch = width * format.conv_byte_count;
6065         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6066
6067         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6068         {
6069             ERR("Out of memory (%u).\n", dst_pitch * height);
6070             context_release(context);
6071             return E_OUTOFMEMORY;
6072         }
6073         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6074                 width, height, dst_pitch, convert, surface);
6075         format.byte_count = format.conv_byte_count;
6076         src_pitch = dst_pitch;
6077     }
6078     else
6079     {
6080         mem = surface->resource.allocatedMemory;
6081     }
6082
6083     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6084     data.addr = mem;
6085     surface_upload_data(surface, gl_info, &format, &src_rect, src_pitch, &dst_point, srgb, &data);
6086
6087     context_release(context);
6088
6089     /* Don't delete PBO memory. */
6090     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6091         HeapFree(GetProcessHeap(), 0, mem);
6092
6093     return WINED3D_OK;
6094 }
6095
6096 static void surface_multisample_resolve(struct wined3d_surface *surface)
6097 {
6098     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6099
6100     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6101         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6102
6103     surface_blt_fbo(surface->resource.device, WINED3DTEXF_POINT,
6104             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6105 }
6106
6107 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6108 {
6109     struct wined3d_device *device = surface->resource.device;
6110     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6111     HRESULT hr;
6112
6113     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6114
6115     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6116     {
6117         if (location == SFLAG_INTEXTURE)
6118         {
6119             struct wined3d_context *context = context_acquire(device, NULL);
6120             surface_load_ds_location(surface, context, SFLAG_DS_OFFSCREEN);
6121             context_release(context);
6122             return WINED3D_OK;
6123         }
6124         else
6125         {
6126             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6127             return WINED3DERR_INVALIDCALL;
6128         }
6129     }
6130
6131     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6132         location = SFLAG_INTEXTURE;
6133
6134     if (surface->flags & location)
6135     {
6136         TRACE("Location already up to date.\n");
6137
6138         if (location == SFLAG_INSYSMEM && !(surface->flags & SFLAG_PBO)
6139                 && surface_need_pbo(surface, gl_info))
6140             surface_load_pbo(surface, gl_info);
6141
6142         return WINED3D_OK;
6143     }
6144
6145     if (WARN_ON(d3d_surface))
6146     {
6147         DWORD required_access = resource_access_from_location(location);
6148         if ((surface->resource.access_flags & required_access) != required_access)
6149             WARN("Operation requires %#x access, but surface only has %#x.\n",
6150                     required_access, surface->resource.access_flags);
6151     }
6152
6153     if (!(surface->flags & SFLAG_LOCATIONS))
6154     {
6155         ERR("Surface %p does not have any up to date location.\n", surface);
6156         surface->flags |= SFLAG_LOST;
6157         return WINED3DERR_DEVICELOST;
6158     }
6159
6160     switch (location)
6161     {
6162         case SFLAG_INSYSMEM:
6163             surface_load_sysmem(surface, gl_info, rect);
6164             break;
6165
6166         case SFLAG_INDRAWABLE:
6167             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6168                 return hr;
6169             break;
6170
6171         case SFLAG_INRB_RESOLVED:
6172             surface_multisample_resolve(surface);
6173             break;
6174
6175         case SFLAG_INTEXTURE:
6176         case SFLAG_INSRGBTEX:
6177             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6178                 return hr;
6179             break;
6180
6181         default:
6182             ERR("Don't know how to handle location %#x.\n", location);
6183             break;
6184     }
6185
6186     if (!rect)
6187     {
6188         surface->flags |= location;
6189
6190         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6191             surface_evict_sysmem(surface);
6192     }
6193
6194     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6195             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6196     {
6197         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6198     }
6199
6200     return WINED3D_OK;
6201 }
6202
6203 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6204 {
6205     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6206
6207     /* Not on a swapchain - must be offscreen */
6208     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6209
6210     /* The front buffer is always onscreen */
6211     if (surface == swapchain->front_buffer) return FALSE;
6212
6213     /* If the swapchain is rendered to an FBO, the backbuffer is
6214      * offscreen, otherwise onscreen */
6215     return swapchain->render_to_fbo;
6216 }
6217
6218 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6219 /* Context activation is done by the caller. */
6220 static void ffp_blit_free(struct wined3d_device *device) { }
6221
6222 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6223 /* Context activation is done by the caller. */
6224 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6225 {
6226     BYTE table[256][4];
6227     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6228
6229     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6230
6231     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6232     ENTER_GL();
6233     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6234     LEAVE_GL();
6235 }
6236
6237 /* Context activation is done by the caller. */
6238 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6239 {
6240     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6241
6242     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6243      * else the surface is converted in software at upload time in LoadLocation.
6244      */
6245     if (!(surface->flags & SFLAG_CONVERTED) && fixup == COMPLEX_FIXUP_P8
6246             && context->gl_info->supported[EXT_PALETTED_TEXTURE])
6247         ffp_blit_p8_upload_palette(surface, context->gl_info);
6248
6249     ENTER_GL();
6250     glEnable(surface->texture_target);
6251     checkGLcall("glEnable(surface->texture_target)");
6252     LEAVE_GL();
6253     return WINED3D_OK;
6254 }
6255
6256 /* Context activation is done by the caller. */
6257 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6258 {
6259     ENTER_GL();
6260     glDisable(GL_TEXTURE_2D);
6261     checkGLcall("glDisable(GL_TEXTURE_2D)");
6262     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6263     {
6264         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6265         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6266     }
6267     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6268     {
6269         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6270         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6271     }
6272     LEAVE_GL();
6273 }
6274
6275 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6276         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6277         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6278 {
6279     enum complex_fixup src_fixup;
6280
6281     switch (blit_op)
6282     {
6283         case WINED3D_BLIT_OP_COLOR_BLIT:
6284             if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
6285                 return FALSE;
6286
6287             src_fixup = get_complex_fixup(src_format->color_fixup);
6288             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6289             {
6290                 TRACE("Checking support for fixup:\n");
6291                 dump_color_fixup_desc(src_format->color_fixup);
6292             }
6293
6294             if (!is_identity_fixup(dst_format->color_fixup))
6295             {
6296                 TRACE("Destination fixups are not supported\n");
6297                 return FALSE;
6298             }
6299
6300             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6301             {
6302                 TRACE("P8 fixup supported\n");
6303                 return TRUE;
6304             }
6305
6306             /* We only support identity conversions. */
6307             if (is_identity_fixup(src_format->color_fixup))
6308             {
6309                 TRACE("[OK]\n");
6310                 return TRUE;
6311             }
6312
6313             TRACE("[FAILED]\n");
6314             return FALSE;
6315
6316         case WINED3D_BLIT_OP_COLOR_FILL:
6317             if (dst_pool == WINED3DPOOL_SYSTEMMEM)
6318                 return FALSE;
6319
6320             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6321             {
6322                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6323                     return FALSE;
6324             }
6325             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6326             {
6327                 TRACE("Color fill not supported\n");
6328                 return FALSE;
6329             }
6330
6331             /* FIXME: We should reject color fills on formats with fixups,
6332              * but this would break P8 color fills for example. */
6333
6334             return TRUE;
6335
6336         case WINED3D_BLIT_OP_DEPTH_FILL:
6337             return TRUE;
6338
6339         default:
6340             TRACE("Unsupported blit_op=%d\n", blit_op);
6341             return FALSE;
6342     }
6343 }
6344
6345 /* Do not call while under the GL lock. */
6346 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6347         const RECT *dst_rect, const struct wined3d_color *color)
6348 {
6349     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6350     struct wined3d_fb_state fb = {&dst_surface, NULL};
6351
6352     return device_clear_render_targets(device, 1, &fb,
6353             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6354 }
6355
6356 /* Do not call while under the GL lock. */
6357 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6358         struct wined3d_surface *surface, const RECT *rect, float depth)
6359 {
6360     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6361     struct wined3d_fb_state fb = {NULL, surface};
6362
6363     return device_clear_render_targets(device, 0, &fb,
6364             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6365 }
6366
6367 const struct blit_shader ffp_blit =  {
6368     ffp_blit_alloc,
6369     ffp_blit_free,
6370     ffp_blit_set,
6371     ffp_blit_unset,
6372     ffp_blit_supported,
6373     ffp_blit_color_fill,
6374     ffp_blit_depth_fill,
6375 };
6376
6377 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6378 {
6379     return WINED3D_OK;
6380 }
6381
6382 /* Context activation is done by the caller. */
6383 static void cpu_blit_free(struct wined3d_device *device)
6384 {
6385 }
6386
6387 /* Context activation is done by the caller. */
6388 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6389 {
6390     return WINED3D_OK;
6391 }
6392
6393 /* Context activation is done by the caller. */
6394 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6395 {
6396 }
6397
6398 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6399         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6400         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6401 {
6402     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6403     {
6404         return TRUE;
6405     }
6406
6407     return FALSE;
6408 }
6409
6410 static HRESULT surface_cpu_blt_compressed(const BYTE *src_data, BYTE *dst_data,
6411         UINT src_pitch, UINT dst_pitch, UINT update_w, UINT update_h,
6412         const struct wined3d_format *format, DWORD flags, const WINEDDBLTFX *fx)
6413 {
6414     UINT row_block_count;
6415     const BYTE *src_row;
6416     BYTE *dst_row;
6417     UINT x, y;
6418
6419     src_row = src_data;
6420     dst_row = dst_data;
6421
6422     row_block_count = (update_w + format->block_width - 1) / format->block_width;
6423
6424     if (!flags)
6425     {
6426         for (y = 0; y < update_h; y += format->block_height)
6427         {
6428             memcpy(dst_row, src_row, row_block_count * format->block_byte_count);
6429             src_row += src_pitch;
6430             dst_row += dst_pitch;
6431         }
6432
6433         return WINED3D_OK;
6434     }
6435
6436     if (flags == WINEDDBLT_DDFX && fx->dwDDFX == WINEDDBLTFX_MIRRORUPDOWN)
6437     {
6438         src_row += (((update_h / format->block_height) - 1) * src_pitch);
6439
6440         switch (format->id)
6441         {
6442             case WINED3DFMT_DXT1:
6443                 for (y = 0; y < update_h; y += format->block_height)
6444                 {
6445                     struct block
6446                     {
6447                         WORD color[2];
6448                         BYTE control_row[4];
6449                     };
6450
6451                     const struct block *s = (const struct block *)src_row;
6452                     struct block *d = (struct block *)dst_row;
6453
6454                     for (x = 0; x < row_block_count; ++x)
6455                     {
6456                         d[x].color[0] = s[x].color[0];
6457                         d[x].color[1] = s[x].color[1];
6458                         d[x].control_row[0] = s[x].control_row[3];
6459                         d[x].control_row[1] = s[x].control_row[2];
6460                         d[x].control_row[2] = s[x].control_row[1];
6461                         d[x].control_row[3] = s[x].control_row[0];
6462                     }
6463                     src_row -= src_pitch;
6464                     dst_row += dst_pitch;
6465                 }
6466                 return WINED3D_OK;
6467
6468             case WINED3DFMT_DXT3:
6469                 for (y = 0; y < update_h; y += format->block_height)
6470                 {
6471                     struct block
6472                     {
6473                         WORD alpha_row[4];
6474                         WORD color[2];
6475                         BYTE control_row[4];
6476                     };
6477
6478                     const struct block *s = (const struct block *)src_row;
6479                     struct block *d = (struct block *)dst_row;
6480
6481                     for (x = 0; x < row_block_count; ++x)
6482                     {
6483                         d[x].alpha_row[0] = s[x].alpha_row[3];
6484                         d[x].alpha_row[1] = s[x].alpha_row[2];
6485                         d[x].alpha_row[2] = s[x].alpha_row[1];
6486                         d[x].alpha_row[3] = s[x].alpha_row[0];
6487                         d[x].color[0] = s[x].color[0];
6488                         d[x].color[1] = s[x].color[1];
6489                         d[x].control_row[0] = s[x].control_row[3];
6490                         d[x].control_row[1] = s[x].control_row[2];
6491                         d[x].control_row[2] = s[x].control_row[1];
6492                         d[x].control_row[3] = s[x].control_row[0];
6493                     }
6494                     src_row -= src_pitch;
6495                     dst_row += dst_pitch;
6496                 }
6497                 return WINED3D_OK;
6498
6499             default:
6500                 FIXME("Compressed flip not implemented for format %s.\n",
6501                         debug_d3dformat(format->id));
6502                 return E_NOTIMPL;
6503         }
6504     }
6505
6506     FIXME("Unsupported blit on compressed surface (format %s, flags %#x, DDFX %#x).\n",
6507             debug_d3dformat(format->id), flags, flags & WINEDDBLT_DDFX ? fx->dwDDFX : 0);
6508
6509     return E_NOTIMPL;
6510 }
6511
6512 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6513         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6514         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
6515 {
6516     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6517     const struct wined3d_format *src_format, *dst_format;
6518     struct wined3d_surface *orig_src = src_surface;
6519     struct wined3d_mapped_rect dst_map, src_map;
6520     HRESULT hr = WINED3D_OK;
6521     const BYTE *sbuf;
6522     RECT xdst,xsrc;
6523     BYTE *dbuf;
6524     int x, y;
6525
6526     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6527             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6528             flags, fx, debug_d3dtexturefiltertype(filter));
6529
6530     xsrc = *src_rect;
6531
6532     if (!src_surface)
6533     {
6534         RECT full_rect;
6535
6536         full_rect.left = 0;
6537         full_rect.top = 0;
6538         full_rect.right = dst_surface->resource.width;
6539         full_rect.bottom = dst_surface->resource.height;
6540         IntersectRect(&xdst, &full_rect, dst_rect);
6541     }
6542     else
6543     {
6544         BOOL clip_horiz, clip_vert;
6545
6546         xdst = *dst_rect;
6547         clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6548         clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6549
6550         if (clip_vert || clip_horiz)
6551         {
6552             /* Now check if this is a special case or not... */
6553             if ((flags & WINEDDBLT_DDFX)
6554                     || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6555                     || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6556             {
6557                 WARN("Out of screen rectangle in special case. Not handled right now.\n");
6558                 return WINED3D_OK;
6559             }
6560
6561             if (clip_horiz)
6562             {
6563                 if (xdst.left < 0)
6564                 {
6565                     xsrc.left -= xdst.left;
6566                     xdst.left = 0;
6567                 }
6568                 if (xdst.right > dst_surface->resource.width)
6569                 {
6570                     xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6571                     xdst.right = (int)dst_surface->resource.width;
6572                 }
6573             }
6574
6575             if (clip_vert)
6576             {
6577                 if (xdst.top < 0)
6578                 {
6579                     xsrc.top -= xdst.top;
6580                     xdst.top = 0;
6581                 }
6582                 if (xdst.bottom > dst_surface->resource.height)
6583                 {
6584                     xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6585                     xdst.bottom = (int)dst_surface->resource.height;
6586                 }
6587             }
6588
6589             /* And check if after clipping something is still to be done... */
6590             if ((xdst.right <= 0) || (xdst.bottom <= 0)
6591                     || (xdst.left >= (int)dst_surface->resource.width)
6592                     || (xdst.top >= (int)dst_surface->resource.height)
6593                     || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6594                     || (xsrc.left >= (int)src_surface->resource.width)
6595                     || (xsrc.top >= (int)src_surface->resource.height))
6596             {
6597                 TRACE("Nothing to be done after clipping.\n");
6598                 return WINED3D_OK;
6599             }
6600         }
6601     }
6602
6603     if (src_surface == dst_surface)
6604     {
6605         wined3d_surface_map(dst_surface, &dst_map, NULL, 0);
6606         src_map = dst_map;
6607         src_format = dst_surface->resource.format;
6608         dst_format = src_format;
6609     }
6610     else
6611     {
6612         dst_format = dst_surface->resource.format;
6613         if (src_surface)
6614         {
6615             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6616             {
6617                 src_surface = surface_convert_format(src_surface, dst_format->id);
6618                 if (!src_surface)
6619                 {
6620                     /* The conv function writes a FIXME */
6621                     WARN("Cannot convert source surface format to dest format.\n");
6622                     goto release;
6623                 }
6624             }
6625             wined3d_surface_map(src_surface, &src_map, NULL, WINED3DLOCK_READONLY);
6626             src_format = src_surface->resource.format;
6627         }
6628         else
6629         {
6630             src_format = dst_format;
6631         }
6632         if (dst_rect)
6633             wined3d_surface_map(dst_surface, &dst_map, &xdst, 0);
6634         else
6635             wined3d_surface_map(dst_surface, &dst_map, NULL, 0);
6636     }
6637
6638     bpp = dst_surface->resource.format->byte_count;
6639     srcheight = xsrc.bottom - xsrc.top;
6640     srcwidth = xsrc.right - xsrc.left;
6641     dstheight = xdst.bottom - xdst.top;
6642     dstwidth = xdst.right - xdst.left;
6643     width = (xdst.right - xdst.left) * bpp;
6644
6645     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_BLOCKS)
6646     {
6647         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6648
6649         if (src_surface == dst_surface)
6650         {
6651             FIXME("Only plain blits supported on compressed surfaces.\n");
6652             hr = E_NOTIMPL;
6653             goto release;
6654         }
6655
6656         if (srcheight != dstheight || srcwidth != dstwidth)
6657         {
6658             WARN("Stretching not supported on compressed surfaces.\n");
6659             hr = WINED3DERR_INVALIDCALL;
6660             goto release;
6661         }
6662
6663         if (srcwidth & (src_format->block_width - 1) || srcheight & (src_format->block_height - 1))
6664         {
6665             WARN("Rectangle not block-aligned.\n");
6666             hr = WINED3DERR_INVALIDCALL;
6667             goto release;
6668         }
6669
6670         hr = surface_cpu_blt_compressed(src_map.data, dst_map.data,
6671                 src_map.row_pitch, dst_map.row_pitch, dstwidth, dstheight,
6672                 src_format, flags, fx);
6673         goto release;
6674     }
6675
6676     if (dst_rect && src_surface != dst_surface)
6677         dbuf = dst_map.data;
6678     else
6679         dbuf = (BYTE *)dst_map.data + (xdst.top * dst_map.row_pitch) + (xdst.left * bpp);
6680
6681     /* First, all the 'source-less' blits */
6682     if (flags & WINEDDBLT_COLORFILL)
6683     {
6684         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, fx->u5.dwFillColor);
6685         flags &= ~WINEDDBLT_COLORFILL;
6686     }
6687
6688     if (flags & WINEDDBLT_DEPTHFILL)
6689     {
6690         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6691     }
6692     if (flags & WINEDDBLT_ROP)
6693     {
6694         /* Catch some degenerate cases here. */
6695         switch (fx->dwROP)
6696         {
6697             case BLACKNESS:
6698                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, 0);
6699                 break;
6700             case 0xAA0029: /* No-op */
6701                 break;
6702             case WHITENESS:
6703                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, ~0U);
6704                 break;
6705             case SRCCOPY: /* Well, we do that below? */
6706                 break;
6707             default:
6708                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6709                 goto error;
6710         }
6711         flags &= ~WINEDDBLT_ROP;
6712     }
6713     if (flags & WINEDDBLT_DDROPS)
6714     {
6715         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6716     }
6717     /* Now the 'with source' blits. */
6718     if (src_surface)
6719     {
6720         const BYTE *sbase;
6721         int sx, xinc, sy, yinc;
6722
6723         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6724             goto release;
6725
6726         if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
6727                 && (srcwidth != dstwidth || srcheight != dstheight))
6728         {
6729             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6730             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6731         }
6732
6733         sbase = (BYTE *)src_map.data + (xsrc.top * src_map.row_pitch) + xsrc.left * bpp;
6734         xinc = (srcwidth << 16) / dstwidth;
6735         yinc = (srcheight << 16) / dstheight;
6736
6737         if (!flags)
6738         {
6739             /* No effects, we can cheat here. */
6740             if (dstwidth == srcwidth)
6741             {
6742                 if (dstheight == srcheight)
6743                 {
6744                     /* No stretching in either direction. This needs to be as
6745                      * fast as possible. */
6746                     sbuf = sbase;
6747
6748                     /* Check for overlapping surfaces. */
6749                     if (src_surface != dst_surface || xdst.top < xsrc.top
6750                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6751                     {
6752                         /* No overlap, or dst above src, so copy from top downwards. */
6753                         for (y = 0; y < dstheight; ++y)
6754                         {
6755                             memcpy(dbuf, sbuf, width);
6756                             sbuf += src_map.row_pitch;
6757                             dbuf += dst_map.row_pitch;
6758                         }
6759                     }
6760                     else if (xdst.top > xsrc.top)
6761                     {
6762                         /* Copy from bottom upwards. */
6763                         sbuf += src_map.row_pitch * dstheight;
6764                         dbuf += dst_map.row_pitch * dstheight;
6765                         for (y = 0; y < dstheight; ++y)
6766                         {
6767                             sbuf -= src_map.row_pitch;
6768                             dbuf -= dst_map.row_pitch;
6769                             memcpy(dbuf, sbuf, width);
6770                         }
6771                     }
6772                     else
6773                     {
6774                         /* Src and dst overlapping on the same line, use memmove. */
6775                         for (y = 0; y < dstheight; ++y)
6776                         {
6777                             memmove(dbuf, sbuf, width);
6778                             sbuf += src_map.row_pitch;
6779                             dbuf += dst_map.row_pitch;
6780                         }
6781                     }
6782                 }
6783                 else
6784                 {
6785                     /* Stretching in y direction only. */
6786                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6787                     {
6788                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6789                         memcpy(dbuf, sbuf, width);
6790                         dbuf += dst_map.row_pitch;
6791                     }
6792                 }
6793             }
6794             else
6795             {
6796                 /* Stretching in X direction. */
6797                 int last_sy = -1;
6798                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6799                 {
6800                     sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6801
6802                     if ((sy >> 16) == (last_sy >> 16))
6803                     {
6804                         /* This source row is the same as last source row -
6805                          * Copy the already stretched row. */
6806                         memcpy(dbuf, dbuf - dst_map.row_pitch, width);
6807                     }
6808                     else
6809                     {
6810 #define STRETCH_ROW(type) \
6811 do { \
6812     const type *s = (const type *)sbuf; \
6813     type *d = (type *)dbuf; \
6814     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6815         d[x] = s[sx >> 16]; \
6816 } while(0)
6817
6818                         switch(bpp)
6819                         {
6820                             case 1:
6821                                 STRETCH_ROW(BYTE);
6822                                 break;
6823                             case 2:
6824                                 STRETCH_ROW(WORD);
6825                                 break;
6826                             case 4:
6827                                 STRETCH_ROW(DWORD);
6828                                 break;
6829                             case 3:
6830                             {
6831                                 const BYTE *s;
6832                                 BYTE *d = dbuf;
6833                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6834                                 {
6835                                     DWORD pixel;
6836
6837                                     s = sbuf + 3 * (sx >> 16);
6838                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6839                                     d[0] = (pixel      ) & 0xff;
6840                                     d[1] = (pixel >>  8) & 0xff;
6841                                     d[2] = (pixel >> 16) & 0xff;
6842                                     d += 3;
6843                                 }
6844                                 break;
6845                             }
6846                             default:
6847                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6848                                 hr = WINED3DERR_NOTAVAILABLE;
6849                                 goto error;
6850                         }
6851 #undef STRETCH_ROW
6852                     }
6853                     dbuf += dst_map.row_pitch;
6854                     last_sy = sy;
6855                 }
6856             }
6857         }
6858         else
6859         {
6860             LONG dstyinc = dst_map.row_pitch, dstxinc = bpp;
6861             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6862             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6863             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6864             {
6865                 /* The color keying flags are checked for correctness in ddraw */
6866                 if (flags & WINEDDBLT_KEYSRC)
6867                 {
6868                     keylow  = src_surface->src_blt_color_key.color_space_low_value;
6869                     keyhigh = src_surface->src_blt_color_key.color_space_high_value;
6870                 }
6871                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6872                 {
6873                     keylow = fx->ddckSrcColorkey.color_space_low_value;
6874                     keyhigh = fx->ddckSrcColorkey.color_space_high_value;
6875                 }
6876
6877                 if (flags & WINEDDBLT_KEYDEST)
6878                 {
6879                     /* Destination color keys are taken from the source surface! */
6880                     destkeylow = src_surface->dst_blt_color_key.color_space_low_value;
6881                     destkeyhigh = src_surface->dst_blt_color_key.color_space_high_value;
6882                 }
6883                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6884                 {
6885                     destkeylow = fx->ddckDestColorkey.color_space_low_value;
6886                     destkeyhigh = fx->ddckDestColorkey.color_space_high_value;
6887                 }
6888
6889                 if (bpp == 1)
6890                 {
6891                     keymask = 0xff;
6892                 }
6893                 else
6894                 {
6895                     keymask = src_format->red_mask
6896                             | src_format->green_mask
6897                             | src_format->blue_mask;
6898                 }
6899                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6900             }
6901
6902             if (flags & WINEDDBLT_DDFX)
6903             {
6904                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6905                 LONG tmpxy;
6906                 dTopLeft     = dbuf;
6907                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6908                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dst_map.row_pitch);
6909                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6910
6911                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6912                 {
6913                     /* I don't think we need to do anything about this flag */
6914                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6915                 }
6916                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6917                 {
6918                     tmp          = dTopRight;
6919                     dTopRight    = dTopLeft;
6920                     dTopLeft     = tmp;
6921                     tmp          = dBottomRight;
6922                     dBottomRight = dBottomLeft;
6923                     dBottomLeft  = tmp;
6924                     dstxinc = dstxinc * -1;
6925                 }
6926                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6927                 {
6928                     tmp          = dTopLeft;
6929                     dTopLeft     = dBottomLeft;
6930                     dBottomLeft  = tmp;
6931                     tmp          = dTopRight;
6932                     dTopRight    = dBottomRight;
6933                     dBottomRight = tmp;
6934                     dstyinc = dstyinc * -1;
6935                 }
6936                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6937                 {
6938                     /* I don't think we need to do anything about this flag */
6939                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6940                 }
6941                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6942                 {
6943                     tmp          = dBottomRight;
6944                     dBottomRight = dTopLeft;
6945                     dTopLeft     = tmp;
6946                     tmp          = dBottomLeft;
6947                     dBottomLeft  = dTopRight;
6948                     dTopRight    = tmp;
6949                     dstxinc = dstxinc * -1;
6950                     dstyinc = dstyinc * -1;
6951                 }
6952                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6953                 {
6954                     tmp          = dTopLeft;
6955                     dTopLeft     = dBottomLeft;
6956                     dBottomLeft  = dBottomRight;
6957                     dBottomRight = dTopRight;
6958                     dTopRight    = tmp;
6959                     tmpxy   = dstxinc;
6960                     dstxinc = dstyinc;
6961                     dstyinc = tmpxy;
6962                     dstxinc = dstxinc * -1;
6963                 }
6964                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6965                 {
6966                     tmp          = dTopLeft;
6967                     dTopLeft     = dTopRight;
6968                     dTopRight    = dBottomRight;
6969                     dBottomRight = dBottomLeft;
6970                     dBottomLeft  = tmp;
6971                     tmpxy   = dstxinc;
6972                     dstxinc = dstyinc;
6973                     dstyinc = tmpxy;
6974                     dstyinc = dstyinc * -1;
6975                 }
6976                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6977                 {
6978                     /* I don't think we need to do anything about this flag */
6979                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6980                 }
6981                 dbuf = dTopLeft;
6982                 flags &= ~(WINEDDBLT_DDFX);
6983             }
6984
6985 #define COPY_COLORKEY_FX(type) \
6986 do { \
6987     const type *s; \
6988     type *d = (type *)dbuf, *dx, tmp; \
6989     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
6990     { \
6991         s = (const type *)(sbase + (sy >> 16) * src_map.row_pitch); \
6992         dx = d; \
6993         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6994         { \
6995             tmp = s[sx >> 16]; \
6996             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
6997                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
6998             { \
6999                 dx[0] = tmp; \
7000             } \
7001             dx = (type *)(((BYTE *)dx) + dstxinc); \
7002         } \
7003         d = (type *)(((BYTE *)d) + dstyinc); \
7004     } \
7005 } while(0)
7006
7007             switch (bpp)
7008             {
7009                 case 1:
7010                     COPY_COLORKEY_FX(BYTE);
7011                     break;
7012                 case 2:
7013                     COPY_COLORKEY_FX(WORD);
7014                     break;
7015                 case 4:
7016                     COPY_COLORKEY_FX(DWORD);
7017                     break;
7018                 case 3:
7019                 {
7020                     const BYTE *s;
7021                     BYTE *d = dbuf, *dx;
7022                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
7023                     {
7024                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
7025                         dx = d;
7026                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7027                         {
7028                             DWORD pixel, dpixel = 0;
7029                             s = sbuf + 3 * (sx>>16);
7030                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7031                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7032                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7033                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7034                             {
7035                                 dx[0] = (pixel      ) & 0xff;
7036                                 dx[1] = (pixel >>  8) & 0xff;
7037                                 dx[2] = (pixel >> 16) & 0xff;
7038                             }
7039                             dx += dstxinc;
7040                         }
7041                         d += dstyinc;
7042                     }
7043                     break;
7044                 }
7045                 default:
7046                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7047                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7048                     hr = WINED3DERR_NOTAVAILABLE;
7049                     goto error;
7050 #undef COPY_COLORKEY_FX
7051             }
7052         }
7053     }
7054
7055 error:
7056     if (flags && FIXME_ON(d3d_surface))
7057     {
7058         FIXME("\tUnsupported flags: %#x.\n", flags);
7059     }
7060
7061 release:
7062     wined3d_surface_unmap(dst_surface);
7063     if (src_surface && src_surface != dst_surface)
7064         wined3d_surface_unmap(src_surface);
7065     /* Release the converted surface, if any. */
7066     if (src_surface && src_surface != orig_src)
7067         wined3d_surface_decref(src_surface);
7068
7069     return hr;
7070 }
7071
7072 /* Do not call while under the GL lock. */
7073 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7074         const RECT *dst_rect, const struct wined3d_color *color)
7075 {
7076     static const RECT src_rect;
7077     WINEDDBLTFX BltFx;
7078
7079     memset(&BltFx, 0, sizeof(BltFx));
7080     BltFx.dwSize = sizeof(BltFx);
7081     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7082     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7083             WINEDDBLT_COLORFILL, &BltFx, WINED3DTEXF_POINT);
7084 }
7085
7086 /* Do not call while under the GL lock. */
7087 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7088         struct wined3d_surface *surface, const RECT *rect, float depth)
7089 {
7090     FIXME("Depth filling not implemented by cpu_blit.\n");
7091     return WINED3DERR_INVALIDCALL;
7092 }
7093
7094 const struct blit_shader cpu_blit =  {
7095     cpu_blit_alloc,
7096     cpu_blit_free,
7097     cpu_blit_set,
7098     cpu_blit_unset,
7099     cpu_blit_supported,
7100     cpu_blit_color_fill,
7101     cpu_blit_depth_fill,
7102 };
7103
7104 static HRESULT surface_init(struct wined3d_surface *surface, WINED3DSURFTYPE surface_type, UINT alignment,
7105         UINT width, UINT height, UINT level, WINED3DMULTISAMPLE_TYPE multisample_type,
7106         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7107         WINED3DPOOL pool, DWORD flags, void *parent, const struct wined3d_parent_ops *parent_ops)
7108 {
7109     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7110     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7111     BOOL lockable = flags & WINED3D_SURFACE_MAPPABLE;
7112     unsigned int resource_size;
7113     HRESULT hr;
7114
7115     if (multisample_quality > 0)
7116     {
7117         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7118         multisample_quality = 0;
7119     }
7120
7121     /* Quick lockable sanity check.
7122      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7123      * this function is too deep to need to care about things like this.
7124      * Levels need to be checked too, since they all affect what can be done. */
7125     switch (pool)
7126     {
7127         case WINED3DPOOL_SCRATCH:
7128             if (!lockable)
7129             {
7130                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7131                         "which are mutually exclusive, setting lockable to TRUE.\n");
7132                 lockable = TRUE;
7133             }
7134             break;
7135
7136         case WINED3DPOOL_SYSTEMMEM:
7137             if (!lockable)
7138                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7139             break;
7140
7141         case WINED3DPOOL_MANAGED:
7142             if (usage & WINED3DUSAGE_DYNAMIC)
7143                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7144             break;
7145
7146         case WINED3DPOOL_DEFAULT:
7147             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7148                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7149             break;
7150
7151         default:
7152             FIXME("Unknown pool %#x.\n", pool);
7153             break;
7154     };
7155
7156     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3DPOOL_DEFAULT)
7157         FIXME("Trying to create a render target that isn't in the default pool.\n");
7158
7159     /* FIXME: Check that the format is supported by the device. */
7160
7161     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7162     if (!resource_size)
7163         return WINED3DERR_INVALIDCALL;
7164
7165     surface->surface_type = surface_type;
7166
7167     switch (surface_type)
7168     {
7169         case SURFACE_OPENGL:
7170             surface->surface_ops = &surface_ops;
7171             break;
7172
7173         case SURFACE_GDI:
7174             surface->surface_ops = &gdi_surface_ops;
7175             break;
7176
7177         default:
7178             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7179             return WINED3DERR_INVALIDCALL;
7180     }
7181
7182     hr = resource_init(&surface->resource, device, WINED3DRTYPE_SURFACE, format,
7183             multisample_type, multisample_quality, usage, pool, width, height, 1,
7184             resource_size, parent, parent_ops, &surface_resource_ops);
7185     if (FAILED(hr))
7186     {
7187         WARN("Failed to initialize resource, returning %#x.\n", hr);
7188         return hr;
7189     }
7190
7191     /* "Standalone" surface. */
7192     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7193
7194     surface->texture_level = level;
7195     list_init(&surface->overlays);
7196
7197     /* Flags */
7198     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7199     if (flags & WINED3D_SURFACE_DISCARD)
7200         surface->flags |= SFLAG_DISCARD;
7201     if (flags & WINED3D_SURFACE_PIN_SYSMEM)
7202         surface->flags |= SFLAG_PIN_SYSMEM;
7203     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7204         surface->flags |= SFLAG_LOCKABLE;
7205     /* I'm not sure if this qualifies as a hack or as an optimization. It
7206      * seems reasonable to assume that lockable render targets will get
7207      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7208      * creation. However, the other reason we want to do this is that several
7209      * ddraw applications access surface memory while the surface isn't
7210      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7211      * future locks prevents these from crashing. */
7212     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7213         surface->flags |= SFLAG_DYNLOCK;
7214
7215     /* Mark the texture as dirty so that it gets loaded first time around. */
7216     surface_add_dirty_rect(surface, NULL);
7217     list_init(&surface->renderbuffers);
7218
7219     TRACE("surface %p, memory %p, size %u\n",
7220             surface, surface->resource.allocatedMemory, surface->resource.size);
7221
7222     /* Call the private setup routine */
7223     hr = surface->surface_ops->surface_private_setup(surface);
7224     if (FAILED(hr))
7225     {
7226         ERR("Private setup failed, returning %#x\n", hr);
7227         surface_cleanup(surface);
7228         return hr;
7229     }
7230
7231     /* Similar to lockable rendertargets above, creating the DIB section
7232      * during surface initialization prevents the sysmem pointer from changing
7233      * after a wined3d_surface_getdc() call. */
7234     if ((usage & WINED3DUSAGE_OWNDC) && !surface->hDC
7235             && SUCCEEDED(surface_create_dib_section(surface)))
7236     {
7237         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
7238         surface->resource.heapMemory = NULL;
7239         surface->resource.allocatedMemory = surface->dib.bitmap_data;
7240     }
7241
7242     return hr;
7243 }
7244
7245 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7246         enum wined3d_format_id format_id, UINT level, DWORD usage, WINED3DPOOL pool,
7247         WINED3DMULTISAMPLE_TYPE multisample_type, DWORD multisample_quality, WINED3DSURFTYPE surface_type,
7248         DWORD flags, void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7249 {
7250     struct wined3d_surface *object;
7251     HRESULT hr;
7252
7253     TRACE("device %p, width %u, height %u, format %s, level %u\n",
7254             device, width, height, debug_d3dformat(format_id), level);
7255     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7256             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7257     TRACE("surface_type %#x, flags %#x, parent %p, parent_ops %p.\n", surface_type, flags, parent, parent_ops);
7258
7259     if (surface_type == SURFACE_OPENGL && !device->adapter)
7260     {
7261         ERR("OpenGL surfaces are not available without OpenGL.\n");
7262         return WINED3DERR_NOTAVAILABLE;
7263     }
7264
7265     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7266     if (!object)
7267     {
7268         ERR("Failed to allocate surface memory.\n");
7269         return WINED3DERR_OUTOFVIDEOMEMORY;
7270     }
7271
7272     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level,
7273             multisample_type, multisample_quality, device, usage, format_id, pool, flags, parent, parent_ops);
7274     if (FAILED(hr))
7275     {
7276         WARN("Failed to initialize surface, returning %#x.\n", hr);
7277         HeapFree(GetProcessHeap(), 0, object);
7278         return hr;
7279     }
7280
7281     TRACE("Created surface %p.\n", object);
7282     *surface = object;
7283
7284     return hr;
7285 }