msxml3: Skip leading space characters when loading from BSTR.
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2011 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         WINED3DTEXTUREFILTERTYPE filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     struct wined3d_surface *overlay, *cur;
46
47     TRACE("surface %p.\n", surface);
48
49     if (surface->texture_name || (surface->flags & SFLAG_PBO)
50              || surface->rb_multisample || surface->rb_resolved
51              || !list_empty(&surface->renderbuffers))
52     {
53         struct wined3d_renderbuffer_entry *entry, *entry2;
54         const struct wined3d_gl_info *gl_info;
55         struct wined3d_context *context;
56
57         context = context_acquire(surface->resource.device, NULL);
58         gl_info = context->gl_info;
59
60         ENTER_GL();
61
62         if (surface->texture_name)
63         {
64             TRACE("Deleting texture %u.\n", surface->texture_name);
65             glDeleteTextures(1, &surface->texture_name);
66         }
67
68         if (surface->flags & SFLAG_PBO)
69         {
70             TRACE("Deleting PBO %u.\n", surface->pbo);
71             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
72         }
73
74         if (surface->rb_multisample)
75         {
76             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
77             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
78         }
79
80         if (surface->rb_resolved)
81         {
82             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
83             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
84         }
85
86         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
87         {
88             TRACE("Deleting renderbuffer %u.\n", entry->id);
89             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
90             HeapFree(GetProcessHeap(), 0, entry);
91         }
92
93         LEAVE_GL();
94
95         context_release(context);
96     }
97
98     if (surface->flags & SFLAG_DIBSECTION)
99     {
100         DeleteDC(surface->hDC);
101         DeleteObject(surface->dib.DIBsection);
102         surface->dib.bitmap_data = NULL;
103         surface->resource.allocatedMemory = NULL;
104     }
105
106     if (surface->flags & SFLAG_USERPTR)
107         wined3d_surface_set_mem(surface, NULL);
108     if (surface->overlay_dest)
109         list_remove(&surface->overlay_entry);
110
111     LIST_FOR_EACH_ENTRY_SAFE(overlay, cur, &surface->overlays, struct wined3d_surface, overlay_entry)
112     {
113         list_remove(&overlay->overlay_entry);
114         overlay->overlay_dest = NULL;
115     }
116
117     resource_cleanup(&surface->resource);
118 }
119
120 void surface_update_draw_binding(struct wined3d_surface *surface)
121 {
122     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
123         surface->draw_binding = SFLAG_INDRAWABLE;
124     else if (surface->resource.multisample_type)
125         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
126     else
127         surface->draw_binding = SFLAG_INTEXTURE;
128 }
129
130 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
131 {
132     TRACE("surface %p, container %p.\n", surface, container);
133
134     if (!container && type != WINED3D_CONTAINER_NONE)
135         ERR("Setting NULL container of type %#x.\n", type);
136
137     if (type == WINED3D_CONTAINER_SWAPCHAIN)
138     {
139         surface->get_drawable_size = get_drawable_size_swapchain;
140     }
141     else
142     {
143         switch (wined3d_settings.offscreen_rendering_mode)
144         {
145             case ORM_FBO:
146                 surface->get_drawable_size = get_drawable_size_fbo;
147                 break;
148
149             case ORM_BACKBUFFER:
150                 surface->get_drawable_size = get_drawable_size_backbuffer;
151                 break;
152
153             default:
154                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
155                 return;
156         }
157     }
158
159     surface->container.type = type;
160     surface->container.u.base = container;
161     surface_update_draw_binding(surface);
162 }
163
164 struct blt_info
165 {
166     GLenum binding;
167     GLenum bind_target;
168     enum tex_types tex_type;
169     GLfloat coords[4][3];
170 };
171
172 struct float_rect
173 {
174     float l;
175     float t;
176     float r;
177     float b;
178 };
179
180 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
181 {
182     f->l = ((r->left * 2.0f) / w) - 1.0f;
183     f->t = ((r->top * 2.0f) / h) - 1.0f;
184     f->r = ((r->right * 2.0f) / w) - 1.0f;
185     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
186 }
187
188 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
189 {
190     GLfloat (*coords)[3] = info->coords;
191     struct float_rect f;
192
193     switch (target)
194     {
195         default:
196             FIXME("Unsupported texture target %#x\n", target);
197             /* Fall back to GL_TEXTURE_2D */
198         case GL_TEXTURE_2D:
199             info->binding = GL_TEXTURE_BINDING_2D;
200             info->bind_target = GL_TEXTURE_2D;
201             info->tex_type = tex_2d;
202             coords[0][0] = (float)rect->left / w;
203             coords[0][1] = (float)rect->top / h;
204             coords[0][2] = 0.0f;
205
206             coords[1][0] = (float)rect->right / w;
207             coords[1][1] = (float)rect->top / h;
208             coords[1][2] = 0.0f;
209
210             coords[2][0] = (float)rect->left / w;
211             coords[2][1] = (float)rect->bottom / h;
212             coords[2][2] = 0.0f;
213
214             coords[3][0] = (float)rect->right / w;
215             coords[3][1] = (float)rect->bottom / h;
216             coords[3][2] = 0.0f;
217             break;
218
219         case GL_TEXTURE_RECTANGLE_ARB:
220             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
221             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
222             info->tex_type = tex_rect;
223             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
224             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
225             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
226             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
227             break;
228
229         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
230             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
231             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
232             info->tex_type = tex_cube;
233             cube_coords_float(rect, w, h, &f);
234
235             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
236             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
237             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
238             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
239             break;
240
241         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
242             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
243             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
244             info->tex_type = tex_cube;
245             cube_coords_float(rect, w, h, &f);
246
247             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
248             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
249             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
250             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
251             break;
252
253         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
254             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
255             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
256             info->tex_type = tex_cube;
257             cube_coords_float(rect, w, h, &f);
258
259             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
260             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
261             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
262             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
263             break;
264
265         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
266             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
267             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
268             info->tex_type = tex_cube;
269             cube_coords_float(rect, w, h, &f);
270
271             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
272             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
273             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
274             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
275             break;
276
277         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
278             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
279             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
280             info->tex_type = tex_cube;
281             cube_coords_float(rect, w, h, &f);
282
283             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
284             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
285             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
286             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
287             break;
288
289         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
290             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
291             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
292             info->tex_type = tex_cube;
293             cube_coords_float(rect, w, h, &f);
294
295             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
296             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
297             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
298             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
299             break;
300     }
301 }
302
303 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
304 {
305     if (rect_in)
306         *rect_out = *rect_in;
307     else
308     {
309         rect_out->left = 0;
310         rect_out->top = 0;
311         rect_out->right = surface->resource.width;
312         rect_out->bottom = surface->resource.height;
313     }
314 }
315
316 /* GL locking and context activation is done by the caller */
317 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
318         const RECT *src_rect, const RECT *dst_rect, WINED3DTEXTUREFILTERTYPE Filter)
319 {
320     struct blt_info info;
321
322     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
323
324     glEnable(info.bind_target);
325     checkGLcall("glEnable(bind_target)");
326
327     context_bind_texture(context, info.bind_target, src_surface->texture_name);
328
329     /* Filtering for StretchRect */
330     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
331             wined3d_gl_mag_filter(magLookup, Filter));
332     checkGLcall("glTexParameteri");
333     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
334             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
335     checkGLcall("glTexParameteri");
336     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
337     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
338     if (context->gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
339         glTexParameteri(info.bind_target, GL_TEXTURE_SRGB_DECODE_EXT, GL_SKIP_DECODE_EXT);
340     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
341     checkGLcall("glTexEnvi");
342
343     /* Draw a quad */
344     glBegin(GL_TRIANGLE_STRIP);
345     glTexCoord3fv(info.coords[0]);
346     glVertex2i(dst_rect->left, dst_rect->top);
347
348     glTexCoord3fv(info.coords[1]);
349     glVertex2i(dst_rect->right, dst_rect->top);
350
351     glTexCoord3fv(info.coords[2]);
352     glVertex2i(dst_rect->left, dst_rect->bottom);
353
354     glTexCoord3fv(info.coords[3]);
355     glVertex2i(dst_rect->right, dst_rect->bottom);
356     glEnd();
357
358     /* Unbind the texture */
359     context_bind_texture(context, info.bind_target, 0);
360
361     /* We changed the filtering settings on the texture. Inform the
362      * container about this to get the filters reset properly next draw. */
363     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
364     {
365         struct wined3d_texture *texture = src_surface->container.u.texture;
366         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3DTEXF_POINT;
367         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3DTEXF_POINT;
368         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3DTEXF_NONE;
369         texture->texture_rgb.states[WINED3DTEXSTA_SRGBTEXTURE] = FALSE;
370     }
371 }
372
373 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
374 {
375     const struct wined3d_format *format = surface->resource.format;
376     SYSTEM_INFO sysInfo;
377     BITMAPINFO *b_info;
378     int extraline = 0;
379     DWORD *masks;
380     UINT usage;
381     HDC dc;
382
383     TRACE("surface %p.\n", surface);
384
385     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
386     {
387         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
388         return WINED3DERR_INVALIDCALL;
389     }
390
391     switch (format->byte_count)
392     {
393         case 2:
394         case 4:
395             /* Allocate extra space to store the RGB bit masks. */
396             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
397             break;
398
399         case 3:
400             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
401             break;
402
403         default:
404             /* Allocate extra space for a palette. */
405             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
406                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
407             break;
408     }
409
410     if (!b_info)
411         return E_OUTOFMEMORY;
412
413     /* Some applications access the surface in via DWORDs, and do not take
414      * the necessary care at the end of the surface. So we need at least
415      * 4 extra bytes at the end of the surface. Check against the page size,
416      * if the last page used for the surface has at least 4 spare bytes we're
417      * safe, otherwise add an extra line to the DIB section. */
418     GetSystemInfo(&sysInfo);
419     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
420     {
421         extraline = 1;
422         TRACE("Adding an extra line to the DIB section.\n");
423     }
424
425     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
426     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
427     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
428     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
429     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
430             * wined3d_surface_get_pitch(surface);
431     b_info->bmiHeader.biPlanes = 1;
432     b_info->bmiHeader.biBitCount = format->byte_count * 8;
433
434     b_info->bmiHeader.biXPelsPerMeter = 0;
435     b_info->bmiHeader.biYPelsPerMeter = 0;
436     b_info->bmiHeader.biClrUsed = 0;
437     b_info->bmiHeader.biClrImportant = 0;
438
439     /* Get the bit masks */
440     masks = (DWORD *)b_info->bmiColors;
441     switch (surface->resource.format->id)
442     {
443         case WINED3DFMT_B8G8R8_UNORM:
444             usage = DIB_RGB_COLORS;
445             b_info->bmiHeader.biCompression = BI_RGB;
446             break;
447
448         case WINED3DFMT_B5G5R5X1_UNORM:
449         case WINED3DFMT_B5G5R5A1_UNORM:
450         case WINED3DFMT_B4G4R4A4_UNORM:
451         case WINED3DFMT_B4G4R4X4_UNORM:
452         case WINED3DFMT_B2G3R3_UNORM:
453         case WINED3DFMT_B2G3R3A8_UNORM:
454         case WINED3DFMT_R10G10B10A2_UNORM:
455         case WINED3DFMT_R8G8B8A8_UNORM:
456         case WINED3DFMT_R8G8B8X8_UNORM:
457         case WINED3DFMT_B10G10R10A2_UNORM:
458         case WINED3DFMT_B5G6R5_UNORM:
459         case WINED3DFMT_R16G16B16A16_UNORM:
460             usage = 0;
461             b_info->bmiHeader.biCompression = BI_BITFIELDS;
462             masks[0] = format->red_mask;
463             masks[1] = format->green_mask;
464             masks[2] = format->blue_mask;
465             break;
466
467         default:
468             /* Don't know palette */
469             b_info->bmiHeader.biCompression = BI_RGB;
470             usage = 0;
471             break;
472     }
473
474     if (!(dc = GetDC(0)))
475     {
476         HeapFree(GetProcessHeap(), 0, b_info);
477         return HRESULT_FROM_WIN32(GetLastError());
478     }
479
480     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
481             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
482             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
483     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
484     ReleaseDC(0, dc);
485
486     if (!surface->dib.DIBsection)
487     {
488         ERR("Failed to create DIB section.\n");
489         HeapFree(GetProcessHeap(), 0, b_info);
490         return HRESULT_FROM_WIN32(GetLastError());
491     }
492
493     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
494     /* Copy the existing surface to the dib section. */
495     if (surface->resource.allocatedMemory)
496     {
497         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
498                 surface->resource.height * wined3d_surface_get_pitch(surface));
499     }
500     else
501     {
502         /* This is to make maps read the GL texture although memory is allocated. */
503         surface->flags &= ~SFLAG_INSYSMEM;
504     }
505     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
506
507     HeapFree(GetProcessHeap(), 0, b_info);
508
509     /* Now allocate a DC. */
510     surface->hDC = CreateCompatibleDC(0);
511     SelectObject(surface->hDC, surface->dib.DIBsection);
512     TRACE("Using wined3d palette %p.\n", surface->palette);
513     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
514
515     surface->flags |= SFLAG_DIBSECTION;
516
517     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
518     surface->resource.heapMemory = NULL;
519
520     return WINED3D_OK;
521 }
522
523 static BOOL surface_need_pbo(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
524 {
525     if (surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
526         return FALSE;
527     if (!(surface->flags & SFLAG_DYNLOCK))
528         return FALSE;
529     if (surface->flags & (SFLAG_CONVERTED | SFLAG_NONPOW2 | SFLAG_PIN_SYSMEM))
530         return FALSE;
531     if (!gl_info->supported[ARB_PIXEL_BUFFER_OBJECT])
532         return FALSE;
533
534     return TRUE;
535 }
536
537 static void surface_load_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
538 {
539     struct wined3d_context *context;
540     GLenum error;
541
542     context = context_acquire(surface->resource.device, NULL);
543     ENTER_GL();
544
545     GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
546     error = glGetError();
547     if (!surface->pbo || error != GL_NO_ERROR)
548         ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
549
550     TRACE("Binding PBO %u.\n", surface->pbo);
551
552     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
553     checkGLcall("glBindBufferARB");
554
555     GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
556             surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
557     checkGLcall("glBufferDataARB");
558
559     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
560     checkGLcall("glBindBufferARB");
561
562     /* We don't need the system memory anymore and we can't even use it for PBOs. */
563     if (!(surface->flags & SFLAG_CLIENT))
564     {
565         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
566         surface->resource.heapMemory = NULL;
567     }
568     surface->resource.allocatedMemory = NULL;
569     surface->flags |= SFLAG_PBO;
570     LEAVE_GL();
571     context_release(context);
572 }
573
574 static void surface_prepare_system_memory(struct wined3d_surface *surface)
575 {
576     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
577
578     TRACE("surface %p.\n", surface);
579
580     if (!(surface->flags & SFLAG_PBO) && surface_need_pbo(surface, gl_info))
581         surface_load_pbo(surface, gl_info);
582     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
583     {
584         /* Whatever surface we have, make sure that there is memory allocated
585          * for the downloaded copy, or a PBO to map. */
586         if (!surface->resource.heapMemory)
587             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
588
589         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
590                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
591
592         if (surface->flags & SFLAG_INSYSMEM)
593             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
594     }
595 }
596
597 static void surface_evict_sysmem(struct wined3d_surface *surface)
598 {
599     if (surface->flags & SFLAG_DONOTFREE)
600         return;
601
602     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
603     surface->resource.allocatedMemory = NULL;
604     surface->resource.heapMemory = NULL;
605     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
606 }
607
608 /* Context activation is done by the caller. */
609 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
610         struct wined3d_context *context, BOOL srgb)
611 {
612     struct wined3d_device *device = surface->resource.device;
613     DWORD active_sampler;
614
615     /* We don't need a specific texture unit, but after binding the texture
616      * the current unit is dirty. Read the unit back instead of switching to
617      * 0, this avoids messing around with the state manager's GL states. The
618      * current texture unit should always be a valid one.
619      *
620      * To be more specific, this is tricky because we can implicitly be
621      * called from sampler() in state.c. This means we can't touch anything
622      * other than whatever happens to be the currently active texture, or we
623      * would risk marking already applied sampler states dirty again. */
624     active_sampler = device->rev_tex_unit_map[context->active_texture];
625
626     if (active_sampler != WINED3D_UNMAPPED_STAGE)
627         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
628     surface_bind(surface, context, srgb);
629 }
630
631 static void surface_force_reload(struct wined3d_surface *surface)
632 {
633     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
634 }
635
636 static void surface_release_client_storage(struct wined3d_surface *surface)
637 {
638     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
639
640     ENTER_GL();
641     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
642     if (surface->texture_name)
643     {
644         surface_bind_and_dirtify(surface, context, FALSE);
645         glTexImage2D(surface->texture_target, surface->texture_level,
646                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
647     }
648     if (surface->texture_name_srgb)
649     {
650         surface_bind_and_dirtify(surface, context, TRUE);
651         glTexImage2D(surface->texture_target, surface->texture_level,
652                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
653     }
654     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
655     LEAVE_GL();
656
657     context_release(context);
658
659     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
660     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
661     surface_force_reload(surface);
662 }
663
664 static HRESULT surface_private_setup(struct wined3d_surface *surface)
665 {
666     /* TODO: Check against the maximum texture sizes supported by the video card. */
667     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
668     unsigned int pow2Width, pow2Height;
669
670     TRACE("surface %p.\n", surface);
671
672     surface->texture_name = 0;
673     surface->texture_target = GL_TEXTURE_2D;
674
675     /* Non-power2 support */
676     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
677     {
678         pow2Width = surface->resource.width;
679         pow2Height = surface->resource.height;
680     }
681     else
682     {
683         /* Find the nearest pow2 match */
684         pow2Width = pow2Height = 1;
685         while (pow2Width < surface->resource.width)
686             pow2Width <<= 1;
687         while (pow2Height < surface->resource.height)
688             pow2Height <<= 1;
689     }
690     surface->pow2Width = pow2Width;
691     surface->pow2Height = pow2Height;
692
693     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
694     {
695         /* TODO: Add support for non power two compressed textures. */
696         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
697         {
698             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
699                   surface, surface->resource.width, surface->resource.height);
700             return WINED3DERR_NOTAVAILABLE;
701         }
702     }
703
704     if (pow2Width != surface->resource.width
705             || pow2Height != surface->resource.height)
706     {
707         surface->flags |= SFLAG_NONPOW2;
708     }
709
710     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
711             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
712     {
713         /* One of three options:
714          * 1: Do the same as we do with NPOT and scale the texture, (any
715          *    texture ops would require the texture to be scaled which is
716          *    potentially slow)
717          * 2: Set the texture to the maximum size (bad idea).
718          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
719          * 4: Create the surface, but allow it to be used only for DirectDraw
720          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
721          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
722          *    the render target. */
723         if (surface->resource.pool == WINED3DPOOL_DEFAULT || surface->resource.pool == WINED3DPOOL_MANAGED)
724         {
725             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
726             return WINED3DERR_NOTAVAILABLE;
727         }
728
729         /* We should never use this surface in combination with OpenGL! */
730         TRACE("Creating an oversized surface: %ux%u.\n",
731                 surface->pow2Width, surface->pow2Height);
732     }
733     else
734     {
735         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
736          * and EXT_PALETTED_TEXTURE is used in combination with texture
737          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
738          * EXT_PALETTED_TEXTURE doesn't work in combination with
739          * ARB_TEXTURE_RECTANGLE. */
740         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
741                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
742                 && gl_info->supported[EXT_PALETTED_TEXTURE]
743                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
744         {
745             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
746             surface->pow2Width = surface->resource.width;
747             surface->pow2Height = surface->resource.height;
748             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
749         }
750     }
751
752     switch (wined3d_settings.offscreen_rendering_mode)
753     {
754         case ORM_FBO:
755             surface->get_drawable_size = get_drawable_size_fbo;
756             break;
757
758         case ORM_BACKBUFFER:
759             surface->get_drawable_size = get_drawable_size_backbuffer;
760             break;
761
762         default:
763             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
764             return WINED3DERR_INVALIDCALL;
765     }
766
767     surface->flags |= SFLAG_INSYSMEM;
768
769     return WINED3D_OK;
770 }
771
772 static void surface_realize_palette(struct wined3d_surface *surface)
773 {
774     struct wined3d_palette *palette = surface->palette;
775
776     TRACE("surface %p.\n", surface);
777
778     if (!palette) return;
779
780     if (surface->resource.format->id == WINED3DFMT_P8_UINT
781             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
782     {
783         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
784         {
785             /* Make sure the texture is up to date. This call doesn't do
786              * anything if the texture is already up to date. */
787             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
788
789             /* We want to force a palette refresh, so mark the drawable as not being up to date */
790             if (!surface_is_offscreen(surface))
791                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
792         }
793         else
794         {
795             if (!(surface->flags & SFLAG_INSYSMEM))
796             {
797                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
798                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
799             }
800             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
801         }
802     }
803
804     if (surface->flags & SFLAG_DIBSECTION)
805     {
806         RGBQUAD col[256];
807         unsigned int i;
808
809         TRACE("Updating the DC's palette.\n");
810
811         for (i = 0; i < 256; ++i)
812         {
813             col[i].rgbRed   = palette->palents[i].peRed;
814             col[i].rgbGreen = palette->palents[i].peGreen;
815             col[i].rgbBlue  = palette->palents[i].peBlue;
816             col[i].rgbReserved = 0;
817         }
818         SetDIBColorTable(surface->hDC, 0, 256, col);
819     }
820
821     /* Propagate the changes to the drawable when we have a palette. */
822     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
823         surface_load_location(surface, surface->draw_binding, NULL);
824 }
825
826 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
827 {
828     HRESULT hr;
829
830     /* If there's no destination surface there is nothing to do. */
831     if (!surface->overlay_dest)
832         return WINED3D_OK;
833
834     /* Blt calls ModifyLocation on the dest surface, which in turn calls
835      * DrawOverlay to update the overlay. Prevent an endless recursion. */
836     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
837         return WINED3D_OK;
838
839     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
840     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
841             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3DTEXF_LINEAR);
842     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
843
844     return hr;
845 }
846
847 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
848 {
849     struct wined3d_device *device = surface->resource.device;
850     const RECT *pass_rect = rect;
851
852     TRACE("surface %p, rect %s, flags %#x.\n",
853             surface, wine_dbgstr_rect(rect), flags);
854
855     if (flags & WINED3DLOCK_DISCARD)
856     {
857         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
858         surface_prepare_system_memory(surface);
859         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
860     }
861     else
862     {
863         /* surface_load_location() does not check if the rectangle specifies
864          * the full surface. Most callers don't need that, so do it here. */
865         if (rect && !rect->top && !rect->left
866                 && rect->right == surface->resource.width
867                 && rect->bottom == surface->resource.height)
868             pass_rect = NULL;
869         surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
870     }
871
872     if (surface->flags & SFLAG_PBO)
873     {
874         const struct wined3d_gl_info *gl_info;
875         struct wined3d_context *context;
876
877         context = context_acquire(device, NULL);
878         gl_info = context->gl_info;
879
880         ENTER_GL();
881         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
882         checkGLcall("glBindBufferARB");
883
884         /* This shouldn't happen but could occur if some other function
885          * didn't handle the PBO properly. */
886         if (surface->resource.allocatedMemory)
887             ERR("The surface already has PBO memory allocated.\n");
888
889         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
890         checkGLcall("glMapBufferARB");
891
892         /* Make sure the PBO isn't set anymore in order not to break non-PBO
893          * calls. */
894         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
895         checkGLcall("glBindBufferARB");
896
897         LEAVE_GL();
898         context_release(context);
899     }
900
901     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
902     {
903         if (!rect)
904             surface_add_dirty_rect(surface, NULL);
905         else
906         {
907             struct wined3d_box b;
908
909             b.left = rect->left;
910             b.top = rect->top;
911             b.right = rect->right;
912             b.bottom = rect->bottom;
913             b.front = 0;
914             b.back = 1;
915             surface_add_dirty_rect(surface, &b);
916         }
917     }
918 }
919
920 static void surface_unmap(struct wined3d_surface *surface)
921 {
922     struct wined3d_device *device = surface->resource.device;
923     BOOL fullsurface;
924
925     TRACE("surface %p.\n", surface);
926
927     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
928
929     if (surface->flags & SFLAG_PBO)
930     {
931         const struct wined3d_gl_info *gl_info;
932         struct wined3d_context *context;
933
934         TRACE("Freeing PBO memory.\n");
935
936         context = context_acquire(device, NULL);
937         gl_info = context->gl_info;
938
939         ENTER_GL();
940         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
941         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
942         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
943         checkGLcall("glUnmapBufferARB");
944         LEAVE_GL();
945         context_release(context);
946
947         surface->resource.allocatedMemory = NULL;
948     }
949
950     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
951
952     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
953     {
954         TRACE("Not dirtified, nothing to do.\n");
955         goto done;
956     }
957
958     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
959             || (device->fb.render_targets && surface == device->fb.render_targets[0]))
960     {
961         if (!surface->dirtyRect.left && !surface->dirtyRect.top
962                 && surface->dirtyRect.right == surface->resource.width
963                 && surface->dirtyRect.bottom == surface->resource.height)
964         {
965             fullsurface = TRUE;
966         }
967         else
968         {
969             /* TODO: Proper partial rectangle tracking. */
970             fullsurface = FALSE;
971             surface->flags |= SFLAG_INSYSMEM;
972         }
973
974         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
975
976         /* Partial rectangle tracking is not commonly implemented, it is only
977          * done for render targets. INSYSMEM was set before to tell
978          * surface_load_location() where to read the rectangle from.
979          * Indrawable is set because all modifications from the partial
980          * sysmem copy are written back to the drawable, thus the surface is
981          * merged again in the drawable. The sysmem copy is not fully up to
982          * date because only a subrectangle was read in Map(). */
983         if (!fullsurface)
984         {
985             surface_modify_location(surface, surface->draw_binding, TRUE);
986             surface_evict_sysmem(surface);
987         }
988
989         surface->dirtyRect.left = surface->resource.width;
990         surface->dirtyRect.top = surface->resource.height;
991         surface->dirtyRect.right = 0;
992         surface->dirtyRect.bottom = 0;
993     }
994     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
995     {
996         FIXME("Depth / stencil buffer locking is not implemented.\n");
997     }
998
999 done:
1000     /* Overlays have to be redrawn manually after changes with the GL implementation */
1001     if (surface->overlay_dest)
1002         surface_draw_overlay(surface);
1003 }
1004
1005 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1006 {
1007     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1008         return FALSE;
1009     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1010         return FALSE;
1011     return TRUE;
1012 }
1013
1014 static void wined3d_surface_depth_blt_fbo(const struct wined3d_device *device, struct wined3d_surface *src_surface,
1015         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1016 {
1017     const struct wined3d_gl_info *gl_info;
1018     struct wined3d_context *context;
1019     DWORD src_mask, dst_mask;
1020     GLbitfield gl_mask;
1021
1022     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1023             device, src_surface, wine_dbgstr_rect(src_rect),
1024             dst_surface, wine_dbgstr_rect(dst_rect));
1025
1026     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1027     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1028
1029     if (src_mask != dst_mask)
1030     {
1031         ERR("Incompatible formats %s and %s.\n",
1032                 debug_d3dformat(src_surface->resource.format->id),
1033                 debug_d3dformat(dst_surface->resource.format->id));
1034         return;
1035     }
1036
1037     if (!src_mask)
1038     {
1039         ERR("Not a depth / stencil format: %s.\n",
1040                 debug_d3dformat(src_surface->resource.format->id));
1041         return;
1042     }
1043
1044     gl_mask = 0;
1045     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1046         gl_mask |= GL_DEPTH_BUFFER_BIT;
1047     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1048         gl_mask |= GL_STENCIL_BUFFER_BIT;
1049
1050     /* Make sure the locations are up-to-date. Loading the destination
1051      * surface isn't required if the entire surface is overwritten. */
1052     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1053     if (!surface_is_full_rect(dst_surface, dst_rect))
1054         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1055
1056     context = context_acquire(device, NULL);
1057     if (!context->valid)
1058     {
1059         context_release(context);
1060         WARN("Invalid context, skipping blit.\n");
1061         return;
1062     }
1063
1064     gl_info = context->gl_info;
1065
1066     ENTER_GL();
1067
1068     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1069     glReadBuffer(GL_NONE);
1070     checkGLcall("glReadBuffer()");
1071     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1072
1073     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1074     context_set_draw_buffer(context, GL_NONE);
1075     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1076
1077     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1078     {
1079         glDepthMask(GL_TRUE);
1080         context_invalidate_state(context, STATE_RENDER(WINED3DRS_ZWRITEENABLE));
1081     }
1082     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1083     {
1084         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1085         {
1086             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1087             context_invalidate_state(context, STATE_RENDER(WINED3DRS_TWOSIDEDSTENCILMODE));
1088         }
1089         glStencilMask(~0U);
1090         context_invalidate_state(context, STATE_RENDER(WINED3DRS_STENCILWRITEMASK));
1091     }
1092
1093     glDisable(GL_SCISSOR_TEST);
1094     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1095
1096     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1097             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1098     checkGLcall("glBlitFramebuffer()");
1099
1100     LEAVE_GL();
1101
1102     if (wined3d_settings.strict_draw_ordering)
1103         wglFlush(); /* Flush to ensure ordering across contexts. */
1104
1105     context_release(context);
1106 }
1107
1108 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1109  * Depth / stencil is not supported. */
1110 static void surface_blt_fbo(const struct wined3d_device *device, const WINED3DTEXTUREFILTERTYPE filter,
1111         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1112         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1113 {
1114     const struct wined3d_gl_info *gl_info;
1115     struct wined3d_context *context;
1116     RECT src_rect, dst_rect;
1117     GLenum gl_filter;
1118     GLenum buffer;
1119
1120     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1121     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1122             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1123     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1124             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1125
1126     src_rect = *src_rect_in;
1127     dst_rect = *dst_rect_in;
1128
1129     switch (filter)
1130     {
1131         case WINED3DTEXF_LINEAR:
1132             gl_filter = GL_LINEAR;
1133             break;
1134
1135         default:
1136             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1137         case WINED3DTEXF_NONE:
1138         case WINED3DTEXF_POINT:
1139             gl_filter = GL_NEAREST;
1140             break;
1141     }
1142
1143     /* Resolve the source surface first if needed. */
1144     if (src_location == SFLAG_INRB_MULTISAMPLE
1145             && (src_surface->resource.format->id != dst_surface->resource.format->id
1146                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1147                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1148         src_location = SFLAG_INRB_RESOLVED;
1149
1150     /* Make sure the locations are up-to-date. Loading the destination
1151      * surface isn't required if the entire surface is overwritten. (And is
1152      * in fact harmful if we're being called by surface_load_location() with
1153      * the purpose of loading the destination surface.) */
1154     surface_load_location(src_surface, src_location, NULL);
1155     if (!surface_is_full_rect(dst_surface, &dst_rect))
1156         surface_load_location(dst_surface, dst_location, NULL);
1157
1158     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1159     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1160     else context = context_acquire(device, NULL);
1161
1162     if (!context->valid)
1163     {
1164         context_release(context);
1165         WARN("Invalid context, skipping blit.\n");
1166         return;
1167     }
1168
1169     gl_info = context->gl_info;
1170
1171     if (src_location == SFLAG_INDRAWABLE)
1172     {
1173         TRACE("Source surface %p is onscreen.\n", src_surface);
1174         buffer = surface_get_gl_buffer(src_surface);
1175         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1176     }
1177     else
1178     {
1179         TRACE("Source surface %p is offscreen.\n", src_surface);
1180         buffer = GL_COLOR_ATTACHMENT0;
1181     }
1182
1183     ENTER_GL();
1184     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1185     glReadBuffer(buffer);
1186     checkGLcall("glReadBuffer()");
1187     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1188     LEAVE_GL();
1189
1190     if (dst_location == SFLAG_INDRAWABLE)
1191     {
1192         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1193         buffer = surface_get_gl_buffer(dst_surface);
1194         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1195     }
1196     else
1197     {
1198         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1199         buffer = GL_COLOR_ATTACHMENT0;
1200     }
1201
1202     ENTER_GL();
1203     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1204     context_set_draw_buffer(context, buffer);
1205     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1206     context_invalidate_state(context, STATE_FRAMEBUFFER);
1207
1208     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1209     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE));
1210     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE1));
1211     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE2));
1212     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE3));
1213
1214     glDisable(GL_SCISSOR_TEST);
1215     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1216
1217     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1218             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1219     checkGLcall("glBlitFramebuffer()");
1220
1221     LEAVE_GL();
1222
1223     if (wined3d_settings.strict_draw_ordering
1224             || (dst_location == SFLAG_INDRAWABLE
1225             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1226         wglFlush();
1227
1228     context_release(context);
1229 }
1230
1231 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1232         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
1233         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
1234 {
1235     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1236         return FALSE;
1237
1238     /* Source and/or destination need to be on the GL side */
1239     if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
1240         return FALSE;
1241
1242     switch (blit_op)
1243     {
1244         case WINED3D_BLIT_OP_COLOR_BLIT:
1245             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1246                 return FALSE;
1247             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1248                 return FALSE;
1249             break;
1250
1251         case WINED3D_BLIT_OP_DEPTH_BLIT:
1252             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1253                 return FALSE;
1254             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1255                 return FALSE;
1256             break;
1257
1258         default:
1259             return FALSE;
1260     }
1261
1262     if (!(src_format->id == dst_format->id
1263             || (is_identity_fixup(src_format->color_fixup)
1264             && is_identity_fixup(dst_format->color_fixup))))
1265         return FALSE;
1266
1267     return TRUE;
1268 }
1269
1270 /* This function checks if the primary render target uses the 8bit paletted format. */
1271 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1272 {
1273     if (device->fb.render_targets && device->fb.render_targets[0])
1274     {
1275         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1276         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1277                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1278             return TRUE;
1279     }
1280     return FALSE;
1281 }
1282
1283 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1284         DWORD color, struct wined3d_color *float_color)
1285 {
1286     const struct wined3d_format *format = surface->resource.format;
1287     const struct wined3d_device *device = surface->resource.device;
1288
1289     switch (format->id)
1290     {
1291         case WINED3DFMT_P8_UINT:
1292             if (surface->palette)
1293             {
1294                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1295                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1296                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1297             }
1298             else
1299             {
1300                 float_color->r = 0.0f;
1301                 float_color->g = 0.0f;
1302                 float_color->b = 0.0f;
1303             }
1304             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1305             break;
1306
1307         case WINED3DFMT_B5G6R5_UNORM:
1308             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1309             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1310             float_color->b = (color & 0x1f) / 31.0f;
1311             float_color->a = 1.0f;
1312             break;
1313
1314         case WINED3DFMT_B8G8R8_UNORM:
1315         case WINED3DFMT_B8G8R8X8_UNORM:
1316             float_color->r = D3DCOLOR_R(color);
1317             float_color->g = D3DCOLOR_G(color);
1318             float_color->b = D3DCOLOR_B(color);
1319             float_color->a = 1.0f;
1320             break;
1321
1322         case WINED3DFMT_B8G8R8A8_UNORM:
1323             float_color->r = D3DCOLOR_R(color);
1324             float_color->g = D3DCOLOR_G(color);
1325             float_color->b = D3DCOLOR_B(color);
1326             float_color->a = D3DCOLOR_A(color);
1327             break;
1328
1329         default:
1330             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1331             return FALSE;
1332     }
1333
1334     return TRUE;
1335 }
1336
1337 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1338 {
1339     const struct wined3d_format *format = surface->resource.format;
1340
1341     switch (format->id)
1342     {
1343         case WINED3DFMT_S1_UINT_D15_UNORM:
1344             *float_depth = depth / (float)0x00007fff;
1345             break;
1346
1347         case WINED3DFMT_D16_UNORM:
1348             *float_depth = depth / (float)0x0000ffff;
1349             break;
1350
1351         case WINED3DFMT_D24_UNORM_S8_UINT:
1352         case WINED3DFMT_X8D24_UNORM:
1353             *float_depth = depth / (float)0x00ffffff;
1354             break;
1355
1356         case WINED3DFMT_D32_UNORM:
1357             *float_depth = depth / (float)0xffffffff;
1358             break;
1359
1360         default:
1361             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1362             return FALSE;
1363     }
1364
1365     return TRUE;
1366 }
1367
1368 /* Do not call while under the GL lock. */
1369 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1370 {
1371     const struct wined3d_resource *resource = &surface->resource;
1372     struct wined3d_device *device = resource->device;
1373     const struct blit_shader *blitter;
1374
1375     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1376             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1377     if (!blitter)
1378     {
1379         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1380         return WINED3DERR_INVALIDCALL;
1381     }
1382
1383     return blitter->depth_fill(device, surface, rect, depth);
1384 }
1385
1386 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1387         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1388 {
1389     struct wined3d_device *device = src_surface->resource.device;
1390
1391     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1392             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1393             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1394         return WINED3DERR_INVALIDCALL;
1395
1396     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1397
1398     surface_modify_ds_location(dst_surface, SFLAG_DS_OFFSCREEN,
1399             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1400     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
1401
1402     return WINED3D_OK;
1403 }
1404
1405 /* Do not call while under the GL lock. */
1406 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1407         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1408         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1409 {
1410     struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1411     struct wined3d_device *device = dst_surface->resource.device;
1412     DWORD src_ds_flags, dst_ds_flags;
1413     RECT src_rect, dst_rect;
1414     BOOL scale, convert;
1415
1416     static const DWORD simple_blit = WINEDDBLT_ASYNC
1417             | WINEDDBLT_COLORFILL
1418             | WINEDDBLT_WAIT
1419             | WINEDDBLT_DEPTHFILL
1420             | WINEDDBLT_DONOTWAIT;
1421
1422     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1423             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1424             flags, fx, debug_d3dtexturefiltertype(filter));
1425     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1426
1427     if (fx)
1428     {
1429         TRACE("dwSize %#x.\n", fx->dwSize);
1430         TRACE("dwDDFX %#x.\n", fx->dwDDFX);
1431         TRACE("dwROP %#x.\n", fx->dwROP);
1432         TRACE("dwDDROP %#x.\n", fx->dwDDROP);
1433         TRACE("dwRotationAngle %#x.\n", fx->dwRotationAngle);
1434         TRACE("dwZBufferOpCode %#x.\n", fx->dwZBufferOpCode);
1435         TRACE("dwZBufferLow %#x.\n", fx->dwZBufferLow);
1436         TRACE("dwZBufferHigh %#x.\n", fx->dwZBufferHigh);
1437         TRACE("dwZBufferBaseDest %#x.\n", fx->dwZBufferBaseDest);
1438         TRACE("dwZDestConstBitDepth %#x.\n", fx->dwZDestConstBitDepth);
1439         TRACE("lpDDSZBufferDest %p.\n", fx->u1.lpDDSZBufferDest);
1440         TRACE("dwZSrcConstBitDepth %#x.\n", fx->dwZSrcConstBitDepth);
1441         TRACE("lpDDSZBufferSrc %p.\n", fx->u2.lpDDSZBufferSrc);
1442         TRACE("dwAlphaEdgeBlendBitDepth %#x.\n", fx->dwAlphaEdgeBlendBitDepth);
1443         TRACE("dwAlphaEdgeBlend %#x.\n", fx->dwAlphaEdgeBlend);
1444         TRACE("dwReserved %#x.\n", fx->dwReserved);
1445         TRACE("dwAlphaDestConstBitDepth %#x.\n", fx->dwAlphaDestConstBitDepth);
1446         TRACE("lpDDSAlphaDest %p.\n", fx->u3.lpDDSAlphaDest);
1447         TRACE("dwAlphaSrcConstBitDepth %#x.\n", fx->dwAlphaSrcConstBitDepth);
1448         TRACE("lpDDSAlphaSrc %p.\n", fx->u4.lpDDSAlphaSrc);
1449         TRACE("lpDDSPattern %p.\n", fx->u5.lpDDSPattern);
1450         TRACE("ddckDestColorkey {%#x, %#x}.\n",
1451                 fx->ddckDestColorkey.color_space_low_value,
1452                 fx->ddckDestColorkey.color_space_high_value);
1453         TRACE("ddckSrcColorkey {%#x, %#x}.\n",
1454                 fx->ddckSrcColorkey.color_space_low_value,
1455                 fx->ddckSrcColorkey.color_space_high_value);
1456     }
1457
1458     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1459     {
1460         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1461         return WINEDDERR_SURFACEBUSY;
1462     }
1463
1464     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1465
1466     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1467             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1468             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1469             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1470             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1471     {
1472         WARN("The application gave us a bad destination rectangle.\n");
1473         return WINEDDERR_INVALIDRECT;
1474     }
1475
1476     if (src_surface)
1477     {
1478         surface_get_rect(src_surface, src_rect_in, &src_rect);
1479
1480         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1481                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1482                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1483                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1484                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1485         {
1486             WARN("Application gave us bad source rectangle for Blt.\n");
1487             return WINEDDERR_INVALIDRECT;
1488         }
1489     }
1490     else
1491     {
1492         memset(&src_rect, 0, sizeof(src_rect));
1493     }
1494
1495     if (!fx || !(fx->dwDDFX))
1496         flags &= ~WINEDDBLT_DDFX;
1497
1498     if (flags & WINEDDBLT_WAIT)
1499         flags &= ~WINEDDBLT_WAIT;
1500
1501     if (flags & WINEDDBLT_ASYNC)
1502     {
1503         static unsigned int once;
1504
1505         if (!once++)
1506             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1507         flags &= ~WINEDDBLT_ASYNC;
1508     }
1509
1510     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1511     if (flags & WINEDDBLT_DONOTWAIT)
1512     {
1513         static unsigned int once;
1514
1515         if (!once++)
1516             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1517         flags &= ~WINEDDBLT_DONOTWAIT;
1518     }
1519
1520     if (!device->d3d_initialized)
1521     {
1522         WARN("D3D not initialized, using fallback.\n");
1523         goto cpu;
1524     }
1525
1526     /* We want to avoid invalidating the sysmem location for converted
1527      * surfaces, since otherwise we'd have to convert the data back when
1528      * locking them. */
1529     if (dst_surface->flags & SFLAG_CONVERTED)
1530     {
1531         WARN("Converted surface, using CPU blit.\n");
1532         return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1533     }
1534
1535     if (flags & ~simple_blit)
1536     {
1537         WARN("Using fallback for complex blit (%#x).\n", flags);
1538         goto fallback;
1539     }
1540
1541     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1542         src_swapchain = src_surface->container.u.swapchain;
1543     else
1544         src_swapchain = NULL;
1545
1546     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1547         dst_swapchain = dst_surface->container.u.swapchain;
1548     else
1549         dst_swapchain = NULL;
1550
1551     /* This isn't strictly needed. FBO blits for example could deal with
1552      * cross-swapchain blits by first downloading the source to a texture
1553      * before switching to the destination context. We just have this here to
1554      * not have to deal with the issue, since cross-swapchain blits should be
1555      * rare. */
1556     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1557     {
1558         FIXME("Using fallback for cross-swapchain blit.\n");
1559         goto fallback;
1560     }
1561
1562     scale = src_surface
1563             && (src_rect.right - src_rect.left != dst_rect.right - dst_rect.left
1564             || src_rect.bottom - src_rect.top != dst_rect.bottom - dst_rect.top);
1565     convert = src_surface && src_surface->resource.format->id != dst_surface->resource.format->id;
1566
1567     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1568     if (src_surface)
1569         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1570     else
1571         src_ds_flags = 0;
1572
1573     if (src_ds_flags || dst_ds_flags)
1574     {
1575         if (flags & WINEDDBLT_DEPTHFILL)
1576         {
1577             float depth;
1578
1579             TRACE("Depth fill.\n");
1580
1581             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1582                 return WINED3DERR_INVALIDCALL;
1583
1584             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1585                 return WINED3D_OK;
1586         }
1587         else
1588         {
1589             /* Accessing depth / stencil surfaces is supposed to fail while in
1590              * a scene, except for fills, which seem to work. */
1591             if (device->inScene)
1592             {
1593                 WARN("Rejecting depth / stencil access while in scene.\n");
1594                 return WINED3DERR_INVALIDCALL;
1595             }
1596
1597             if (src_ds_flags != dst_ds_flags)
1598             {
1599                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1600                 return WINED3DERR_INVALIDCALL;
1601             }
1602
1603             if (src_rect.top || src_rect.left
1604                     || src_rect.bottom != src_surface->resource.height
1605                     || src_rect.right != src_surface->resource.width)
1606             {
1607                 WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
1608                         wine_dbgstr_rect(&src_rect));
1609                 return WINED3DERR_INVALIDCALL;
1610             }
1611
1612             if (dst_rect.top || dst_rect.left
1613                     || dst_rect.bottom != dst_surface->resource.height
1614                     || dst_rect.right != dst_surface->resource.width)
1615             {
1616                 WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
1617                         wine_dbgstr_rect(&src_rect));
1618                 return WINED3DERR_INVALIDCALL;
1619             }
1620
1621             if (scale)
1622             {
1623                 WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
1624                 return WINED3DERR_INVALIDCALL;
1625             }
1626
1627             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1628                 return WINED3D_OK;
1629         }
1630     }
1631     else
1632     {
1633         /* In principle this would apply to depth blits as well, but we don't
1634          * implement those in the CPU blitter at the moment. */
1635         if ((dst_surface->flags & SFLAG_INSYSMEM)
1636                 && (!src_surface || (src_surface->flags & SFLAG_INSYSMEM)))
1637         {
1638             if (scale)
1639                 TRACE("Not doing sysmem blit because of scaling.\n");
1640             else if (convert)
1641                 TRACE("Not doing sysmem blit because of format conversion.\n");
1642             else
1643                 return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1644         }
1645
1646         if (flags & WINEDDBLT_COLORFILL)
1647         {
1648             struct wined3d_color color;
1649
1650             TRACE("Color fill.\n");
1651
1652             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1653                 goto fallback;
1654
1655             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1656                 return WINED3D_OK;
1657         }
1658         else
1659         {
1660             TRACE("Color blit.\n");
1661
1662             /* Upload */
1663             if ((src_surface->flags & SFLAG_INSYSMEM) && !(dst_surface->flags & SFLAG_INSYSMEM))
1664             {
1665                 if (scale)
1666                     TRACE("Not doing upload because of scaling.\n");
1667                 else if (convert)
1668                     TRACE("Not doing upload because of format conversion.\n");
1669                 else
1670                 {
1671                     POINT dst_point = {dst_rect.left, dst_rect.top};
1672
1673                     if (SUCCEEDED(surface_upload_from_surface(dst_surface, &dst_point, src_surface, &src_rect)))
1674                     {
1675                         if (!surface_is_offscreen(dst_surface))
1676                             surface_load_location(dst_surface, dst_surface->draw_binding, NULL);
1677                         return WINED3D_OK;
1678                     }
1679                 }
1680             }
1681
1682             /* Use present for back -> front blits. The idea behind this is
1683              * that present is potentially faster than a blit, in particular
1684              * when FBO blits aren't available. Some ddraw applications like
1685              * Half-Life and Prince of Persia 3D use Blt() from the backbuffer
1686              * to the frontbuffer instead of doing a Flip(). D3D8 and D3D9
1687              * applications can't blit directly to the frontbuffer. */
1688             if (dst_swapchain && dst_swapchain->back_buffers
1689                     && dst_surface == dst_swapchain->front_buffer
1690                     && src_surface == dst_swapchain->back_buffers[0])
1691             {
1692                 WINED3DSWAPEFFECT swap_effect = dst_swapchain->desc.swap_effect;
1693
1694                 TRACE("Using present for backbuffer -> frontbuffer blit.\n");
1695
1696                 /* Set the swap effect to COPY, we don't want the backbuffer
1697                  * to become undefined. */
1698                 dst_swapchain->desc.swap_effect = WINED3DSWAPEFFECT_COPY;
1699                 wined3d_swapchain_present(dst_swapchain, NULL, NULL, dst_swapchain->win_handle, NULL, 0);
1700                 dst_swapchain->desc.swap_effect = swap_effect;
1701
1702                 return WINED3D_OK;
1703             }
1704
1705             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1706                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1707                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1708             {
1709                 TRACE("Using FBO blit.\n");
1710
1711                 surface_blt_fbo(device, filter,
1712                         src_surface, src_surface->draw_binding, &src_rect,
1713                         dst_surface, dst_surface->draw_binding, &dst_rect);
1714                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1715                 return WINED3D_OK;
1716             }
1717
1718             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1719                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1720                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1721             {
1722                 TRACE("Using arbfp blit.\n");
1723
1724                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1725                     return WINED3D_OK;
1726             }
1727         }
1728     }
1729
1730 fallback:
1731
1732     /* Special cases for render targets. */
1733     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1734             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1735     {
1736         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1737                 src_surface, &src_rect, flags, fx, filter)))
1738             return WINED3D_OK;
1739     }
1740
1741 cpu:
1742
1743     /* For the rest call the X11 surface implementation. For render targets
1744      * this should be implemented OpenGL accelerated in BltOverride, other
1745      * blits are rather rare. */
1746     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1747 }
1748
1749 HRESULT CDECL wined3d_surface_get_render_target_data(struct wined3d_surface *surface,
1750         struct wined3d_surface *render_target)
1751 {
1752     TRACE("surface %p, render_target %p.\n", surface, render_target);
1753
1754     /* TODO: Check surface sizes, pools, etc. */
1755
1756     if (render_target->resource.multisample_type)
1757         return WINED3DERR_INVALIDCALL;
1758
1759     return wined3d_surface_blt(surface, NULL, render_target, NULL, 0, NULL, WINED3DTEXF_POINT);
1760 }
1761
1762 /* Context activation is done by the caller. */
1763 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1764 {
1765     if (surface->flags & SFLAG_DIBSECTION)
1766     {
1767         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1768     }
1769     else if (!surface->resource.heapMemory)
1770     {
1771         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1772         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1773                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1774     }
1775
1776     ENTER_GL();
1777     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1778     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1779     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1780             surface->resource.size, surface->resource.allocatedMemory));
1781     checkGLcall("glGetBufferSubDataARB");
1782     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1783     checkGLcall("glDeleteBuffersARB");
1784     LEAVE_GL();
1785
1786     surface->pbo = 0;
1787     surface->flags &= ~SFLAG_PBO;
1788 }
1789
1790 /* Do not call while under the GL lock. */
1791 static void surface_unload(struct wined3d_resource *resource)
1792 {
1793     struct wined3d_surface *surface = surface_from_resource(resource);
1794     struct wined3d_renderbuffer_entry *entry, *entry2;
1795     struct wined3d_device *device = resource->device;
1796     const struct wined3d_gl_info *gl_info;
1797     struct wined3d_context *context;
1798
1799     TRACE("surface %p.\n", surface);
1800
1801     if (resource->pool == WINED3DPOOL_DEFAULT)
1802     {
1803         /* Default pool resources are supposed to be destroyed before Reset is called.
1804          * Implicit resources stay however. So this means we have an implicit render target
1805          * or depth stencil. The content may be destroyed, but we still have to tear down
1806          * opengl resources, so we cannot leave early.
1807          *
1808          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1809          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1810          * or the depth stencil into an FBO the texture or render buffer will be removed
1811          * and all flags get lost
1812          */
1813         surface_init_sysmem(surface);
1814         /* We also get here when the ddraw swapchain is destroyed, for example
1815          * for a mode switch. In this case this surface won't necessarily be
1816          * an implicit surface. We have to mark it lost so that the
1817          * application can restore it after the mode switch. */
1818         surface->flags |= SFLAG_LOST;
1819     }
1820     else
1821     {
1822         /* Load the surface into system memory */
1823         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1824         surface_modify_location(surface, surface->draw_binding, FALSE);
1825     }
1826     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1827     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1828     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1829
1830     context = context_acquire(device, NULL);
1831     gl_info = context->gl_info;
1832
1833     /* Destroy PBOs, but load them into real sysmem before */
1834     if (surface->flags & SFLAG_PBO)
1835         surface_remove_pbo(surface, gl_info);
1836
1837     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1838      * all application-created targets the application has to release the surface
1839      * before calling _Reset
1840      */
1841     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1842     {
1843         ENTER_GL();
1844         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1845         LEAVE_GL();
1846         list_remove(&entry->entry);
1847         HeapFree(GetProcessHeap(), 0, entry);
1848     }
1849     list_init(&surface->renderbuffers);
1850     surface->current_renderbuffer = NULL;
1851
1852     ENTER_GL();
1853
1854     /* If we're in a texture, the texture name belongs to the texture.
1855      * Otherwise, destroy it. */
1856     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1857     {
1858         glDeleteTextures(1, &surface->texture_name);
1859         surface->texture_name = 0;
1860         glDeleteTextures(1, &surface->texture_name_srgb);
1861         surface->texture_name_srgb = 0;
1862     }
1863     if (surface->rb_multisample)
1864     {
1865         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1866         surface->rb_multisample = 0;
1867     }
1868     if (surface->rb_resolved)
1869     {
1870         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1871         surface->rb_resolved = 0;
1872     }
1873
1874     LEAVE_GL();
1875
1876     context_release(context);
1877
1878     resource_unload(resource);
1879 }
1880
1881 static const struct wined3d_resource_ops surface_resource_ops =
1882 {
1883     surface_unload,
1884 };
1885
1886 static const struct wined3d_surface_ops surface_ops =
1887 {
1888     surface_private_setup,
1889     surface_realize_palette,
1890     surface_map,
1891     surface_unmap,
1892 };
1893
1894 /*****************************************************************************
1895  * Initializes the GDI surface, aka creates the DIB section we render to
1896  * The DIB section creation is done by calling GetDC, which will create the
1897  * section and releasing the dc to allow the app to use it. The dib section
1898  * will stay until the surface is released
1899  *
1900  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1901  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1902  * avoid confusion in the shared surface code.
1903  *
1904  * Returns:
1905  *  WINED3D_OK on success
1906  *  The return values of called methods on failure
1907  *
1908  *****************************************************************************/
1909 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1910 {
1911     HRESULT hr;
1912
1913     TRACE("surface %p.\n", surface);
1914
1915     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1916     {
1917         ERR("Overlays not yet supported by GDI surfaces.\n");
1918         return WINED3DERR_INVALIDCALL;
1919     }
1920
1921     /* Sysmem textures have memory already allocated - release it,
1922      * this avoids an unnecessary memcpy. */
1923     hr = surface_create_dib_section(surface);
1924     if (SUCCEEDED(hr))
1925     {
1926         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1927         surface->resource.heapMemory = NULL;
1928         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1929     }
1930
1931     /* We don't mind the nonpow2 stuff in GDI. */
1932     surface->pow2Width = surface->resource.width;
1933     surface->pow2Height = surface->resource.height;
1934
1935     return WINED3D_OK;
1936 }
1937
1938 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1939 {
1940     struct wined3d_palette *palette = surface->palette;
1941
1942     TRACE("surface %p.\n", surface);
1943
1944     if (!palette) return;
1945
1946     if (surface->flags & SFLAG_DIBSECTION)
1947     {
1948         RGBQUAD col[256];
1949         unsigned int i;
1950
1951         TRACE("Updating the DC's palette.\n");
1952
1953         for (i = 0; i < 256; ++i)
1954         {
1955             col[i].rgbRed = palette->palents[i].peRed;
1956             col[i].rgbGreen = palette->palents[i].peGreen;
1957             col[i].rgbBlue = palette->palents[i].peBlue;
1958             col[i].rgbReserved = 0;
1959         }
1960         SetDIBColorTable(surface->hDC, 0, 256, col);
1961     }
1962
1963     /* Update the image because of the palette change. Some games like e.g.
1964      * Red Alert call SetEntries a lot to implement fading. */
1965     /* Tell the swapchain to update the screen. */
1966     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1967     {
1968         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1969         if (surface == swapchain->front_buffer)
1970         {
1971             x11_copy_to_screen(swapchain, NULL);
1972         }
1973     }
1974 }
1975
1976 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
1977 {
1978     TRACE("surface %p, rect %s, flags %#x.\n",
1979             surface, wine_dbgstr_rect(rect), flags);
1980
1981     if (!(surface->flags & SFLAG_DIBSECTION))
1982     {
1983         /* This happens on gdi surfaces if the application set a user pointer
1984          * and resets it. Recreate the DIB section. */
1985         surface_create_dib_section(surface);
1986         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1987     }
1988 }
1989
1990 static void gdi_surface_unmap(struct wined3d_surface *surface)
1991 {
1992     TRACE("surface %p.\n", surface);
1993
1994     /* Tell the swapchain to update the screen. */
1995     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1996     {
1997         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1998         if (surface == swapchain->front_buffer)
1999         {
2000             x11_copy_to_screen(swapchain, &surface->lockedRect);
2001         }
2002     }
2003
2004     memset(&surface->lockedRect, 0, sizeof(RECT));
2005 }
2006
2007 static const struct wined3d_surface_ops gdi_surface_ops =
2008 {
2009     gdi_surface_private_setup,
2010     gdi_surface_realize_palette,
2011     gdi_surface_map,
2012     gdi_surface_unmap,
2013 };
2014
2015 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2016 {
2017     GLuint *name;
2018     DWORD flag;
2019
2020     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2021
2022     if(srgb)
2023     {
2024         name = &surface->texture_name_srgb;
2025         flag = SFLAG_INSRGBTEX;
2026     }
2027     else
2028     {
2029         name = &surface->texture_name;
2030         flag = SFLAG_INTEXTURE;
2031     }
2032
2033     if (!*name && new_name)
2034     {
2035         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2036          * surface has no texture name yet. See if we can get rid of this. */
2037         if (surface->flags & flag)
2038             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2039         surface_modify_location(surface, flag, FALSE);
2040     }
2041
2042     *name = new_name;
2043     surface_force_reload(surface);
2044 }
2045
2046 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
2047 {
2048     TRACE("surface %p, target %#x.\n", surface, target);
2049
2050     if (surface->texture_target != target)
2051     {
2052         if (target == GL_TEXTURE_RECTANGLE_ARB)
2053         {
2054             surface->flags &= ~SFLAG_NORMCOORD;
2055         }
2056         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2057         {
2058             surface->flags |= SFLAG_NORMCOORD;
2059         }
2060     }
2061     surface->texture_target = target;
2062     surface_force_reload(surface);
2063 }
2064
2065 /* Context activation is done by the caller. */
2066 void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
2067 {
2068     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
2069
2070     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2071     {
2072         struct wined3d_texture *texture = surface->container.u.texture;
2073
2074         TRACE("Passing to container (%p).\n", texture);
2075         texture->texture_ops->texture_bind(texture, context, srgb);
2076     }
2077     else
2078     {
2079         if (surface->texture_level)
2080         {
2081             ERR("Standalone surface %p is non-zero texture level %u.\n",
2082                     surface, surface->texture_level);
2083         }
2084
2085         if (srgb)
2086             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
2087
2088         ENTER_GL();
2089
2090         if (!surface->texture_name)
2091         {
2092             glGenTextures(1, &surface->texture_name);
2093             checkGLcall("glGenTextures");
2094
2095             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
2096
2097             context_bind_texture(context, surface->texture_target, surface->texture_name);
2098             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2099             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2100             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
2101             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2102             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2103             checkGLcall("glTexParameteri");
2104         }
2105         else
2106         {
2107             context_bind_texture(context, surface->texture_target, surface->texture_name);
2108         }
2109
2110         LEAVE_GL();
2111     }
2112 }
2113
2114 /* This call just downloads data, the caller is responsible for binding the
2115  * correct texture. */
2116 /* Context activation is done by the caller. */
2117 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2118 {
2119     const struct wined3d_format *format = surface->resource.format;
2120
2121     /* Only support read back of converted P8 surfaces. */
2122     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2123     {
2124         ERR("Trying to read back converted surface %p with format %s.\n", surface, debug_d3dformat(format->id));
2125         return;
2126     }
2127
2128     ENTER_GL();
2129
2130     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2131     {
2132         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2133                 surface, surface->texture_level, format->glFormat, format->glType,
2134                 surface->resource.allocatedMemory);
2135
2136         if (surface->flags & SFLAG_PBO)
2137         {
2138             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2139             checkGLcall("glBindBufferARB");
2140             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2141             checkGLcall("glGetCompressedTexImageARB");
2142             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2143             checkGLcall("glBindBufferARB");
2144         }
2145         else
2146         {
2147             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2148                     surface->texture_level, surface->resource.allocatedMemory));
2149             checkGLcall("glGetCompressedTexImageARB");
2150         }
2151
2152         LEAVE_GL();
2153     }
2154     else
2155     {
2156         void *mem;
2157         GLenum gl_format = format->glFormat;
2158         GLenum gl_type = format->glType;
2159         int src_pitch = 0;
2160         int dst_pitch = 0;
2161
2162         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2163         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2164         {
2165             gl_format = GL_ALPHA;
2166             gl_type = GL_UNSIGNED_BYTE;
2167         }
2168
2169         if (surface->flags & SFLAG_NONPOW2)
2170         {
2171             unsigned char alignment = surface->resource.device->surface_alignment;
2172             src_pitch = format->byte_count * surface->pow2Width;
2173             dst_pitch = wined3d_surface_get_pitch(surface);
2174             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2175             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2176         }
2177         else
2178         {
2179             mem = surface->resource.allocatedMemory;
2180         }
2181
2182         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2183                 surface, surface->texture_level, gl_format, gl_type, mem);
2184
2185         if (surface->flags & SFLAG_PBO)
2186         {
2187             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2188             checkGLcall("glBindBufferARB");
2189
2190             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2191             checkGLcall("glGetTexImage");
2192
2193             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2194             checkGLcall("glBindBufferARB");
2195         }
2196         else
2197         {
2198             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2199             checkGLcall("glGetTexImage");
2200         }
2201         LEAVE_GL();
2202
2203         if (surface->flags & SFLAG_NONPOW2)
2204         {
2205             const BYTE *src_data;
2206             BYTE *dst_data;
2207             UINT y;
2208             /*
2209              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2210              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2211              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2212              *
2213              * We're doing this...
2214              *
2215              * instead of boxing the texture :
2216              * |<-texture width ->|  -->pow2width|   /\
2217              * |111111111111111111|              |   |
2218              * |222 Texture 222222| boxed empty  | texture height
2219              * |3333 Data 33333333|              |   |
2220              * |444444444444444444|              |   \/
2221              * -----------------------------------   |
2222              * |     boxed  empty | boxed empty  | pow2height
2223              * |                  |              |   \/
2224              * -----------------------------------
2225              *
2226              *
2227              * we're repacking the data to the expected texture width
2228              *
2229              * |<-texture width ->|  -->pow2width|   /\
2230              * |111111111111111111222222222222222|   |
2231              * |222333333333333333333444444444444| texture height
2232              * |444444                           |   |
2233              * |                                 |   \/
2234              * |                                 |   |
2235              * |            empty                | pow2height
2236              * |                                 |   \/
2237              * -----------------------------------
2238              *
2239              * == is the same as
2240              *
2241              * |<-texture width ->|    /\
2242              * |111111111111111111|
2243              * |222222222222222222|texture height
2244              * |333333333333333333|
2245              * |444444444444444444|    \/
2246              * --------------------
2247              *
2248              * this also means that any references to allocatedMemory should work with the data as if were a
2249              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2250              *
2251              * internally the texture is still stored in a boxed format so any references to textureName will
2252              * get a boxed texture with width pow2width and not a texture of width resource.width.
2253              *
2254              * Performance should not be an issue, because applications normally do not lock the surfaces when
2255              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2256              * and doesn't have to be re-read. */
2257             src_data = mem;
2258             dst_data = surface->resource.allocatedMemory;
2259             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2260             for (y = 1; y < surface->resource.height; ++y)
2261             {
2262                 /* skip the first row */
2263                 src_data += src_pitch;
2264                 dst_data += dst_pitch;
2265                 memcpy(dst_data, src_data, dst_pitch);
2266             }
2267
2268             HeapFree(GetProcessHeap(), 0, mem);
2269         }
2270     }
2271
2272     /* Surface has now been downloaded */
2273     surface->flags |= SFLAG_INSYSMEM;
2274 }
2275
2276 /* This call just uploads data, the caller is responsible for binding the
2277  * correct texture. */
2278 /* Context activation is done by the caller. */
2279 static void surface_upload_data(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2280         const struct wined3d_format *format, const RECT *src_rect, UINT src_pitch, const POINT *dst_point,
2281         BOOL srgb, const struct wined3d_bo_address *data)
2282 {
2283     UINT update_w = src_rect->right - src_rect->left;
2284     UINT update_h = src_rect->bottom - src_rect->top;
2285
2286     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_pitch %u, dst_point %s, srgb %#x, data {%#x:%p}.\n",
2287             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_pitch,
2288             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2289
2290     if (format->heightscale != 1.0f && format->heightscale != 0.0f)
2291         update_h *= format->heightscale;
2292
2293     ENTER_GL();
2294
2295     if (data->buffer_object)
2296     {
2297         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2298         checkGLcall("glBindBufferARB");
2299     }
2300
2301     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2302     {
2303         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2304         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2305         const BYTE *addr = data->addr;
2306         GLenum internal;
2307
2308         addr += (src_rect->top / format->block_height) * src_pitch;
2309         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2310
2311         if (srgb)
2312             internal = format->glGammaInternal;
2313         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2314             internal = format->rtInternal;
2315         else
2316             internal = format->glInternal;
2317
2318         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2319                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2320                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2321
2322         if (row_length == src_pitch)
2323         {
2324             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2325                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2326         }
2327         else
2328         {
2329             UINT row, y;
2330
2331             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2332              * can't use the unpack row length like below. */
2333             for (row = 0, y = dst_point->y; row < row_count; ++row)
2334             {
2335                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2336                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2337                 y += format->block_height;
2338                 addr += src_pitch;
2339             }
2340         }
2341         checkGLcall("glCompressedTexSubImage2DARB");
2342     }
2343     else
2344     {
2345         const BYTE *addr = data->addr;
2346
2347         addr += src_rect->top * src_pitch;
2348         addr += src_rect->left * format->byte_count;
2349
2350         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2351                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2352                 update_w, update_h, format->glFormat, format->glType, addr);
2353
2354         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_pitch / format->byte_count);
2355         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2356                 update_w, update_h, format->glFormat, format->glType, addr);
2357         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2358         checkGLcall("glTexSubImage2D");
2359     }
2360
2361     if (data->buffer_object)
2362     {
2363         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2364         checkGLcall("glBindBufferARB");
2365     }
2366
2367     LEAVE_GL();
2368
2369     if (wined3d_settings.strict_draw_ordering)
2370         wglFlush();
2371
2372     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2373     {
2374         struct wined3d_device *device = surface->resource.device;
2375         unsigned int i;
2376
2377         for (i = 0; i < device->context_count; ++i)
2378         {
2379             context_surface_update(device->contexts[i], surface);
2380         }
2381     }
2382 }
2383
2384 HRESULT surface_upload_from_surface(struct wined3d_surface *dst_surface, const POINT *dst_point,
2385         struct wined3d_surface *src_surface, const RECT *src_rect)
2386 {
2387     const struct wined3d_format *src_format;
2388     const struct wined3d_format *dst_format;
2389     const struct wined3d_gl_info *gl_info;
2390     struct wined3d_context *context;
2391     struct wined3d_bo_address data;
2392     struct wined3d_format format;
2393     UINT update_w, update_h;
2394     CONVERT_TYPES convert;
2395     UINT dst_w, dst_h;
2396     UINT src_w, src_h;
2397     UINT src_pitch;
2398     POINT p;
2399     RECT r;
2400
2401     TRACE("dst_surface %p, dst_point %s, src_surface %p, src_rect %s.\n",
2402             dst_surface, wine_dbgstr_point(dst_point),
2403             src_surface, wine_dbgstr_rect(src_rect));
2404
2405     src_format = src_surface->resource.format;
2406     dst_format = dst_surface->resource.format;
2407
2408     if (src_format->id != dst_format->id)
2409     {
2410         WARN("Source and destination surfaces should have the same format.\n");
2411         return WINED3DERR_INVALIDCALL;
2412     }
2413
2414     if (!dst_point)
2415     {
2416         p.x = 0;
2417         p.y = 0;
2418         dst_point = &p;
2419     }
2420     else if (dst_point->x < 0 || dst_point->y < 0)
2421     {
2422         WARN("Invalid destination point.\n");
2423         return WINED3DERR_INVALIDCALL;
2424     }
2425
2426     if (!src_rect)
2427     {
2428         r.left = 0;
2429         r.top = 0;
2430         r.right = src_surface->resource.width;
2431         r.bottom = src_surface->resource.height;
2432         src_rect = &r;
2433     }
2434     else if (src_rect->left < 0 || src_rect->left >= src_rect->right
2435             || src_rect->top < 0 || src_rect->top >= src_rect->bottom)
2436     {
2437         WARN("Invalid source rectangle.\n");
2438         return WINED3DERR_INVALIDCALL;
2439     }
2440
2441     src_w = src_surface->resource.width;
2442     src_h = src_surface->resource.height;
2443
2444     dst_w = dst_surface->resource.width;
2445     dst_h = dst_surface->resource.height;
2446
2447     update_w = src_rect->right - src_rect->left;
2448     update_h = src_rect->bottom - src_rect->top;
2449
2450     if (update_w > dst_w || dst_point->x > dst_w - update_w
2451             || update_h > dst_h || dst_point->y > dst_h - update_h)
2452     {
2453         WARN("Destination out of bounds.\n");
2454         return WINED3DERR_INVALIDCALL;
2455     }
2456
2457     /* NPOT block sizes would be silly. */
2458     if ((src_format->flags & WINED3DFMT_FLAG_BLOCKS)
2459             && ((update_w & (src_format->block_width - 1) || update_h & (src_format->block_height - 1))
2460             && (src_w != update_w || dst_w != update_w || src_h != update_h || dst_h != update_h)))
2461     {
2462         WARN("Update rect not block-aligned.\n");
2463         return WINED3DERR_INVALIDCALL;
2464     }
2465
2466     /* Use wined3d_surface_blt() instead of uploading directly if we need conversion. */
2467     d3dfmt_get_conv(dst_surface, FALSE, TRUE, &format, &convert);
2468     if (convert != NO_CONVERSION || format.convert)
2469     {
2470         RECT dst_rect = {dst_point->x,  dst_point->y, dst_point->x + update_w, dst_point->y + update_h};
2471         return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, src_rect, 0, NULL, WINED3DTEXF_POINT);
2472     }
2473
2474     context = context_acquire(dst_surface->resource.device, NULL);
2475     gl_info = context->gl_info;
2476
2477     /* Only load the surface for partial updates. For newly allocated texture
2478      * the texture wouldn't be the current location, and we'd upload zeroes
2479      * just to overwrite them again. */
2480     if (update_w == dst_w && update_h == dst_h)
2481         surface_prepare_texture(dst_surface, context, FALSE);
2482     else
2483         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
2484     surface_bind(dst_surface, context, FALSE);
2485
2486     data.buffer_object = src_surface->pbo;
2487     data.addr = src_surface->resource.allocatedMemory;
2488     src_pitch = wined3d_surface_get_pitch(src_surface);
2489
2490     surface_upload_data(dst_surface, gl_info, src_format, src_rect, src_pitch, dst_point, FALSE, &data);
2491
2492     invalidate_active_texture(dst_surface->resource.device, context);
2493
2494     context_release(context);
2495
2496     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
2497     return WINED3D_OK;
2498 }
2499
2500 /* This call just allocates the texture, the caller is responsible for binding
2501  * the correct texture. */
2502 /* Context activation is done by the caller. */
2503 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2504         const struct wined3d_format *format, BOOL srgb)
2505 {
2506     BOOL enable_client_storage = FALSE;
2507     GLsizei width = surface->pow2Width;
2508     GLsizei height = surface->pow2Height;
2509     const BYTE *mem = NULL;
2510     GLenum internal;
2511
2512     if (srgb)
2513     {
2514         internal = format->glGammaInternal;
2515     }
2516     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2517     {
2518         internal = format->rtInternal;
2519     }
2520     else
2521     {
2522         internal = format->glInternal;
2523     }
2524
2525     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2526
2527     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2528             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2529             internal, width, height, format->glFormat, format->glType);
2530
2531     ENTER_GL();
2532
2533     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2534     {
2535         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2536                 || !surface->resource.allocatedMemory)
2537         {
2538             /* In some cases we want to disable client storage.
2539              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2540              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2541              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2542              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2543              */
2544             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2545             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2546             surface->flags &= ~SFLAG_CLIENT;
2547             enable_client_storage = TRUE;
2548         }
2549         else
2550         {
2551             surface->flags |= SFLAG_CLIENT;
2552
2553             /* Point OpenGL to our allocated texture memory. Do not use
2554              * resource.allocatedMemory here because it might point into a
2555              * PBO. Instead use heapMemory, but get the alignment right. */
2556             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2557                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2558         }
2559     }
2560
2561     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2562     {
2563         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2564                 internal, width, height, 0, surface->resource.size, mem));
2565         checkGLcall("glCompressedTexImage2DARB");
2566     }
2567     else
2568     {
2569         glTexImage2D(surface->texture_target, surface->texture_level,
2570                 internal, width, height, 0, format->glFormat, format->glType, mem);
2571         checkGLcall("glTexImage2D");
2572     }
2573
2574     if(enable_client_storage) {
2575         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2576         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2577     }
2578     LEAVE_GL();
2579 }
2580
2581 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2582  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2583 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2584 /* GL locking is done by the caller */
2585 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2586 {
2587     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2588     struct wined3d_renderbuffer_entry *entry;
2589     GLuint renderbuffer = 0;
2590     unsigned int src_width, src_height;
2591     unsigned int width, height;
2592
2593     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2594     {
2595         width = rt->pow2Width;
2596         height = rt->pow2Height;
2597     }
2598     else
2599     {
2600         width = surface->pow2Width;
2601         height = surface->pow2Height;
2602     }
2603
2604     src_width = surface->pow2Width;
2605     src_height = surface->pow2Height;
2606
2607     /* A depth stencil smaller than the render target is not valid */
2608     if (width > src_width || height > src_height) return;
2609
2610     /* Remove any renderbuffer set if the sizes match */
2611     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2612             || (width == src_width && height == src_height))
2613     {
2614         surface->current_renderbuffer = NULL;
2615         return;
2616     }
2617
2618     /* Look if we've already got a renderbuffer of the correct dimensions */
2619     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2620     {
2621         if (entry->width == width && entry->height == height)
2622         {
2623             renderbuffer = entry->id;
2624             surface->current_renderbuffer = entry;
2625             break;
2626         }
2627     }
2628
2629     if (!renderbuffer)
2630     {
2631         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2632         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2633         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2634                 surface->resource.format->glInternal, width, height);
2635
2636         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2637         entry->width = width;
2638         entry->height = height;
2639         entry->id = renderbuffer;
2640         list_add_head(&surface->renderbuffers, &entry->entry);
2641
2642         surface->current_renderbuffer = entry;
2643     }
2644
2645     checkGLcall("set_compatible_renderbuffer");
2646 }
2647
2648 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2649 {
2650     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2651
2652     TRACE("surface %p.\n", surface);
2653
2654     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2655     {
2656         ERR("Surface %p is not on a swapchain.\n", surface);
2657         return GL_NONE;
2658     }
2659
2660     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2661     {
2662         if (swapchain->render_to_fbo)
2663         {
2664             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2665             return GL_COLOR_ATTACHMENT0;
2666         }
2667         TRACE("Returning GL_BACK\n");
2668         return GL_BACK;
2669     }
2670     else if (surface == swapchain->front_buffer)
2671     {
2672         TRACE("Returning GL_FRONT\n");
2673         return GL_FRONT;
2674     }
2675
2676     FIXME("Higher back buffer, returning GL_BACK\n");
2677     return GL_BACK;
2678 }
2679
2680 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2681 void surface_add_dirty_rect(struct wined3d_surface *surface, const struct wined3d_box *dirty_rect)
2682 {
2683     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2684
2685     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2686         /* No partial locking for textures yet. */
2687         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2688
2689     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2690     if (dirty_rect)
2691     {
2692         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->left);
2693         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->top);
2694         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->right);
2695         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->bottom);
2696     }
2697     else
2698     {
2699         surface->dirtyRect.left = 0;
2700         surface->dirtyRect.top = 0;
2701         surface->dirtyRect.right = surface->resource.width;
2702         surface->dirtyRect.bottom = surface->resource.height;
2703     }
2704
2705     /* if the container is a texture then mark it dirty. */
2706     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2707     {
2708         TRACE("Passing to container.\n");
2709         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2710     }
2711 }
2712
2713 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2714 {
2715     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2716     BOOL ck_changed;
2717
2718     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2719
2720     if (surface->resource.pool == WINED3DPOOL_SCRATCH)
2721     {
2722         ERR("Not supported on scratch surfaces.\n");
2723         return WINED3DERR_INVALIDCALL;
2724     }
2725
2726     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2727
2728     /* Reload if either the texture and sysmem have different ideas about the
2729      * color key, or the actual key values changed. */
2730     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2731             && (surface->gl_color_key.color_space_low_value != surface->src_blt_color_key.color_space_low_value
2732             || surface->gl_color_key.color_space_high_value != surface->src_blt_color_key.color_space_high_value)))
2733     {
2734         TRACE("Reloading because of color keying\n");
2735         /* To perform the color key conversion we need a sysmem copy of
2736          * the surface. Make sure we have it. */
2737
2738         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2739         /* Make sure the texture is reloaded because of the color key change,
2740          * this kills performance though :( */
2741         /* TODO: This is not necessarily needed with hw palettized texture support. */
2742         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2743         /* Switching color keying on / off may change the internal format. */
2744         if (ck_changed)
2745             surface_force_reload(surface);
2746     }
2747     else if (!(surface->flags & flag))
2748     {
2749         TRACE("Reloading because surface is dirty.\n");
2750     }
2751     else
2752     {
2753         TRACE("surface is already in texture\n");
2754         return WINED3D_OK;
2755     }
2756
2757     /* No partial locking for textures yet. */
2758     surface_load_location(surface, flag, NULL);
2759     surface_evict_sysmem(surface);
2760
2761     return WINED3D_OK;
2762 }
2763
2764 /* See also float_16_to_32() in wined3d_private.h */
2765 static inline unsigned short float_32_to_16(const float *in)
2766 {
2767     int exp = 0;
2768     float tmp = fabsf(*in);
2769     unsigned int mantissa;
2770     unsigned short ret;
2771
2772     /* Deal with special numbers */
2773     if (*in == 0.0f)
2774         return 0x0000;
2775     if (isnan(*in))
2776         return 0x7c01;
2777     if (isinf(*in))
2778         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2779
2780     if (tmp < powf(2, 10))
2781     {
2782         do
2783         {
2784             tmp = tmp * 2.0f;
2785             exp--;
2786         } while (tmp < powf(2, 10));
2787     }
2788     else if (tmp >= powf(2, 11))
2789     {
2790         do
2791         {
2792             tmp /= 2.0f;
2793             exp++;
2794         } while (tmp >= powf(2, 11));
2795     }
2796
2797     mantissa = (unsigned int)tmp;
2798     if (tmp - mantissa >= 0.5f)
2799         ++mantissa; /* Round to nearest, away from zero. */
2800
2801     exp += 10;  /* Normalize the mantissa. */
2802     exp += 15;  /* Exponent is encoded with excess 15. */
2803
2804     if (exp > 30) /* too big */
2805     {
2806         ret = 0x7c00; /* INF */
2807     }
2808     else if (exp <= 0)
2809     {
2810         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2811         while (exp <= 0)
2812         {
2813             mantissa = mantissa >> 1;
2814             ++exp;
2815         }
2816         ret = mantissa & 0x3ff;
2817     }
2818     else
2819     {
2820         ret = (exp << 10) | (mantissa & 0x3ff);
2821     }
2822
2823     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2824     return ret;
2825 }
2826
2827 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2828 {
2829     ULONG refcount;
2830
2831     TRACE("Surface %p, container %p of type %#x.\n",
2832             surface, surface->container.u.base, surface->container.type);
2833
2834     switch (surface->container.type)
2835     {
2836         case WINED3D_CONTAINER_TEXTURE:
2837             return wined3d_texture_incref(surface->container.u.texture);
2838
2839         case WINED3D_CONTAINER_SWAPCHAIN:
2840             return wined3d_swapchain_incref(surface->container.u.swapchain);
2841
2842         default:
2843             ERR("Unhandled container type %#x.\n", surface->container.type);
2844         case WINED3D_CONTAINER_NONE:
2845             break;
2846     }
2847
2848     refcount = InterlockedIncrement(&surface->resource.ref);
2849     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2850
2851     return refcount;
2852 }
2853
2854 /* Do not call while under the GL lock. */
2855 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2856 {
2857     ULONG refcount;
2858
2859     TRACE("Surface %p, container %p of type %#x.\n",
2860             surface, surface->container.u.base, surface->container.type);
2861
2862     switch (surface->container.type)
2863     {
2864         case WINED3D_CONTAINER_TEXTURE:
2865             return wined3d_texture_decref(surface->container.u.texture);
2866
2867         case WINED3D_CONTAINER_SWAPCHAIN:
2868             return wined3d_swapchain_decref(surface->container.u.swapchain);
2869
2870         default:
2871             ERR("Unhandled container type %#x.\n", surface->container.type);
2872         case WINED3D_CONTAINER_NONE:
2873             break;
2874     }
2875
2876     refcount = InterlockedDecrement(&surface->resource.ref);
2877     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2878
2879     if (!refcount)
2880     {
2881         surface_cleanup(surface);
2882         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2883
2884         TRACE("Destroyed surface %p.\n", surface);
2885         HeapFree(GetProcessHeap(), 0, surface);
2886     }
2887
2888     return refcount;
2889 }
2890
2891 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2892 {
2893     return resource_set_priority(&surface->resource, priority);
2894 }
2895
2896 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2897 {
2898     return resource_get_priority(&surface->resource);
2899 }
2900
2901 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2902 {
2903     TRACE("surface %p.\n", surface);
2904
2905     if (!surface->resource.device->d3d_initialized)
2906     {
2907         ERR("D3D not initialized.\n");
2908         return;
2909     }
2910
2911     surface_internal_preload(surface, SRGB_ANY);
2912 }
2913
2914 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2915 {
2916     TRACE("surface %p.\n", surface);
2917
2918     return surface->resource.parent;
2919 }
2920
2921 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2922 {
2923     TRACE("surface %p.\n", surface);
2924
2925     return &surface->resource;
2926 }
2927
2928 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2929 {
2930     TRACE("surface %p, flags %#x.\n", surface, flags);
2931
2932     switch (flags)
2933     {
2934         case WINEDDGBS_CANBLT:
2935         case WINEDDGBS_ISBLTDONE:
2936             return WINED3D_OK;
2937
2938         default:
2939             return WINED3DERR_INVALIDCALL;
2940     }
2941 }
2942
2943 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2944 {
2945     TRACE("surface %p, flags %#x.\n", surface, flags);
2946
2947     /* XXX: DDERR_INVALIDSURFACETYPE */
2948
2949     switch (flags)
2950     {
2951         case WINEDDGFS_CANFLIP:
2952         case WINEDDGFS_ISFLIPDONE:
2953             return WINED3D_OK;
2954
2955         default:
2956             return WINED3DERR_INVALIDCALL;
2957     }
2958 }
2959
2960 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2961 {
2962     TRACE("surface %p.\n", surface);
2963
2964     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2965     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2966 }
2967
2968 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2969 {
2970     TRACE("surface %p.\n", surface);
2971
2972     surface->flags &= ~SFLAG_LOST;
2973     return WINED3D_OK;
2974 }
2975
2976 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2977 {
2978     TRACE("surface %p, palette %p.\n", surface, palette);
2979
2980     if (surface->palette == palette)
2981     {
2982         TRACE("Nop palette change.\n");
2983         return WINED3D_OK;
2984     }
2985
2986     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
2987         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
2988
2989     surface->palette = palette;
2990
2991     if (palette)
2992     {
2993         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
2994             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
2995
2996         surface->surface_ops->surface_realize_palette(surface);
2997     }
2998
2999     return WINED3D_OK;
3000 }
3001
3002 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
3003         DWORD flags, const struct wined3d_color_key *color_key)
3004 {
3005     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
3006
3007     if (flags & WINEDDCKEY_COLORSPACE)
3008     {
3009         FIXME(" colorkey value not supported (%08x) !\n", flags);
3010         return WINED3DERR_INVALIDCALL;
3011     }
3012
3013     /* Dirtify the surface, but only if a key was changed. */
3014     if (color_key)
3015     {
3016         switch (flags & ~WINEDDCKEY_COLORSPACE)
3017         {
3018             case WINEDDCKEY_DESTBLT:
3019                 surface->dst_blt_color_key = *color_key;
3020                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3021                 break;
3022
3023             case WINEDDCKEY_DESTOVERLAY:
3024                 surface->dst_overlay_color_key = *color_key;
3025                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3026                 break;
3027
3028             case WINEDDCKEY_SRCOVERLAY:
3029                 surface->src_overlay_color_key = *color_key;
3030                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3031                 break;
3032
3033             case WINEDDCKEY_SRCBLT:
3034                 surface->src_blt_color_key = *color_key;
3035                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3036                 break;
3037         }
3038     }
3039     else
3040     {
3041         switch (flags & ~WINEDDCKEY_COLORSPACE)
3042         {
3043             case WINEDDCKEY_DESTBLT:
3044                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3045                 break;
3046
3047             case WINEDDCKEY_DESTOVERLAY:
3048                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3049                 break;
3050
3051             case WINEDDCKEY_SRCOVERLAY:
3052                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3053                 break;
3054
3055             case WINEDDCKEY_SRCBLT:
3056                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3057                 break;
3058         }
3059     }
3060
3061     return WINED3D_OK;
3062 }
3063
3064 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3065 {
3066     TRACE("surface %p.\n", surface);
3067
3068     return surface->palette;
3069 }
3070
3071 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3072 {
3073     const struct wined3d_format *format = surface->resource.format;
3074     DWORD pitch;
3075
3076     TRACE("surface %p.\n", surface);
3077
3078     if (format->flags & WINED3DFMT_FLAG_BLOCKS)
3079     {
3080         /* Since compressed formats are block based, pitch means the amount of
3081          * bytes to the next row of block rather than the next row of pixels. */
3082         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3083         pitch = row_block_count * format->block_byte_count;
3084     }
3085     else
3086     {
3087         unsigned char alignment = surface->resource.device->surface_alignment;
3088         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3089         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3090     }
3091
3092     TRACE("Returning %u.\n", pitch);
3093
3094     return pitch;
3095 }
3096
3097 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3098 {
3099     TRACE("surface %p, mem %p.\n", surface, mem);
3100
3101     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
3102     {
3103         WARN("Surface is locked or the DC is in use.\n");
3104         return WINED3DERR_INVALIDCALL;
3105     }
3106
3107     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3108     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3109     {
3110         ERR("Not supported on render targets.\n");
3111         return WINED3DERR_INVALIDCALL;
3112     }
3113
3114     if (mem && mem != surface->resource.allocatedMemory)
3115     {
3116         void *release = NULL;
3117
3118         /* Do I have to copy the old surface content? */
3119         if (surface->flags & SFLAG_DIBSECTION)
3120         {
3121             DeleteDC(surface->hDC);
3122             DeleteObject(surface->dib.DIBsection);
3123             surface->dib.bitmap_data = NULL;
3124             surface->resource.allocatedMemory = NULL;
3125             surface->hDC = NULL;
3126             surface->flags &= ~SFLAG_DIBSECTION;
3127         }
3128         else if (!(surface->flags & SFLAG_USERPTR))
3129         {
3130             release = surface->resource.heapMemory;
3131             surface->resource.heapMemory = NULL;
3132         }
3133         surface->resource.allocatedMemory = mem;
3134         surface->flags |= SFLAG_USERPTR;
3135
3136         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3137         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3138
3139         /* For client textures OpenGL has to be notified. */
3140         if (surface->flags & SFLAG_CLIENT)
3141             surface_release_client_storage(surface);
3142
3143         /* Now free the old memory if any. */
3144         HeapFree(GetProcessHeap(), 0, release);
3145     }
3146     else if (surface->flags & SFLAG_USERPTR)
3147     {
3148         /* HeapMemory should be NULL already. */
3149         if (surface->resource.heapMemory)
3150             ERR("User pointer surface has heap memory allocated.\n");
3151
3152         if (!mem)
3153         {
3154             surface->resource.allocatedMemory = NULL;
3155             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3156
3157             if (surface->flags & SFLAG_CLIENT)
3158                 surface_release_client_storage(surface);
3159
3160             surface_prepare_system_memory(surface);
3161         }
3162
3163         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3164     }
3165
3166     return WINED3D_OK;
3167 }
3168
3169 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3170 {
3171     LONG w, h;
3172
3173     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3174
3175     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3176     {
3177         WARN("Not an overlay surface.\n");
3178         return WINEDDERR_NOTAOVERLAYSURFACE;
3179     }
3180
3181     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3182     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3183     surface->overlay_destrect.left = x;
3184     surface->overlay_destrect.top = y;
3185     surface->overlay_destrect.right = x + w;
3186     surface->overlay_destrect.bottom = y + h;
3187
3188     surface_draw_overlay(surface);
3189
3190     return WINED3D_OK;
3191 }
3192
3193 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3194 {
3195     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3196
3197     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3198     {
3199         TRACE("Not an overlay surface.\n");
3200         return WINEDDERR_NOTAOVERLAYSURFACE;
3201     }
3202
3203     if (!surface->overlay_dest)
3204     {
3205         TRACE("Overlay not visible.\n");
3206         *x = 0;
3207         *y = 0;
3208         return WINEDDERR_OVERLAYNOTVISIBLE;
3209     }
3210
3211     *x = surface->overlay_destrect.left;
3212     *y = surface->overlay_destrect.top;
3213
3214     TRACE("Returning position %d, %d.\n", *x, *y);
3215
3216     return WINED3D_OK;
3217 }
3218
3219 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3220         DWORD flags, struct wined3d_surface *ref)
3221 {
3222     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3223
3224     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3225     {
3226         TRACE("Not an overlay surface.\n");
3227         return WINEDDERR_NOTAOVERLAYSURFACE;
3228     }
3229
3230     return WINED3D_OK;
3231 }
3232
3233 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3234         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3235 {
3236     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3237             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3238
3239     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3240     {
3241         WARN("Not an overlay surface.\n");
3242         return WINEDDERR_NOTAOVERLAYSURFACE;
3243     }
3244     else if (!dst_surface)
3245     {
3246         WARN("Dest surface is NULL.\n");
3247         return WINED3DERR_INVALIDCALL;
3248     }
3249
3250     if (src_rect)
3251     {
3252         surface->overlay_srcrect = *src_rect;
3253     }
3254     else
3255     {
3256         surface->overlay_srcrect.left = 0;
3257         surface->overlay_srcrect.top = 0;
3258         surface->overlay_srcrect.right = surface->resource.width;
3259         surface->overlay_srcrect.bottom = surface->resource.height;
3260     }
3261
3262     if (dst_rect)
3263     {
3264         surface->overlay_destrect = *dst_rect;
3265     }
3266     else
3267     {
3268         surface->overlay_destrect.left = 0;
3269         surface->overlay_destrect.top = 0;
3270         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3271         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3272     }
3273
3274     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3275     {
3276         surface->overlay_dest = NULL;
3277         list_remove(&surface->overlay_entry);
3278     }
3279
3280     if (flags & WINEDDOVER_SHOW)
3281     {
3282         if (surface->overlay_dest != dst_surface)
3283         {
3284             surface->overlay_dest = dst_surface;
3285             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3286         }
3287     }
3288     else if (flags & WINEDDOVER_HIDE)
3289     {
3290         /* tests show that the rectangles are erased on hide */
3291         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3292         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3293         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3294         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3295         surface->overlay_dest = NULL;
3296     }
3297
3298     surface_draw_overlay(surface);
3299
3300     return WINED3D_OK;
3301 }
3302
3303 HRESULT CDECL wined3d_surface_set_format(struct wined3d_surface *surface, enum wined3d_format_id format_id)
3304 {
3305     const struct wined3d_format *format = wined3d_get_format(&surface->resource.device->adapter->gl_info, format_id);
3306
3307     TRACE("surface %p, format %s.\n", surface, debug_d3dformat(format_id));
3308
3309     if (surface->resource.format->id != WINED3DFMT_UNKNOWN)
3310     {
3311         FIXME("The format of the surface must be WINED3DFORMAT_UNKNOWN.\n");
3312         return WINED3DERR_INVALIDCALL;
3313     }
3314
3315     surface->resource.size = wined3d_format_calculate_size(format, surface->resource.device->surface_alignment,
3316             surface->pow2Width, surface->pow2Height);
3317     surface->flags |= (WINED3DFMT_D16_LOCKABLE == format_id) ? SFLAG_LOCKABLE : 0;
3318     surface->resource.format = format;
3319
3320     TRACE("size %u, byte_count %u\n", surface->resource.size, format->byte_count);
3321     TRACE("glFormat %#x, glInternal %#x, glType %#x.\n",
3322             format->glFormat, format->glInternal, format->glType);
3323
3324     return WINED3D_OK;
3325 }
3326
3327 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3328         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3329 {
3330     unsigned short *dst_s;
3331     const float *src_f;
3332     unsigned int x, y;
3333
3334     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3335
3336     for (y = 0; y < h; ++y)
3337     {
3338         src_f = (const float *)(src + y * pitch_in);
3339         dst_s = (unsigned short *) (dst + y * pitch_out);
3340         for (x = 0; x < w; ++x)
3341         {
3342             dst_s[x] = float_32_to_16(src_f + x);
3343         }
3344     }
3345 }
3346
3347 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3348         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3349 {
3350     static const unsigned char convert_5to8[] =
3351     {
3352         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3353         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3354         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3355         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3356     };
3357     static const unsigned char convert_6to8[] =
3358     {
3359         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3360         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3361         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3362         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3363         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3364         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3365         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3366         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3367     };
3368     unsigned int x, y;
3369
3370     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3371
3372     for (y = 0; y < h; ++y)
3373     {
3374         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3375         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3376         for (x = 0; x < w; ++x)
3377         {
3378             WORD pixel = src_line[x];
3379             dst_line[x] = 0xff000000
3380                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3381                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3382                     | convert_5to8[(pixel & 0x001f)];
3383         }
3384     }
3385 }
3386
3387 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3388  * in both cases we're just setting the X / Alpha channel to 0xff. */
3389 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3390         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3391 {
3392     unsigned int x, y;
3393
3394     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3395
3396     for (y = 0; y < h; ++y)
3397     {
3398         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3399         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3400
3401         for (x = 0; x < w; ++x)
3402         {
3403             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3404         }
3405     }
3406 }
3407
3408 static inline BYTE cliptobyte(int x)
3409 {
3410     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3411 }
3412
3413 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3414         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3415 {
3416     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3417     unsigned int x, y;
3418
3419     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3420
3421     for (y = 0; y < h; ++y)
3422     {
3423         const BYTE *src_line = src + y * pitch_in;
3424         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3425         for (x = 0; x < w; ++x)
3426         {
3427             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3428              *     C = Y - 16; D = U - 128; E = V - 128;
3429              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3430              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3431              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3432              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3433              * U and V are shared between the pixels. */
3434             if (!(x & 1)) /* For every even pixel, read new U and V. */
3435             {
3436                 d = (int) src_line[1] - 128;
3437                 e = (int) src_line[3] - 128;
3438                 r2 = 409 * e + 128;
3439                 g2 = - 100 * d - 208 * e + 128;
3440                 b2 = 516 * d + 128;
3441             }
3442             c2 = 298 * ((int) src_line[0] - 16);
3443             dst_line[x] = 0xff000000
3444                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3445                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3446                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3447                 /* Scale RGB values to 0..255 range,
3448                  * then clip them if still not in range (may be negative),
3449                  * then shift them within DWORD if necessary. */
3450             src_line += 2;
3451         }
3452     }
3453 }
3454
3455 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3456         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3457 {
3458     unsigned int x, y;
3459     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3460
3461     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3462
3463     for (y = 0; y < h; ++y)
3464     {
3465         const BYTE *src_line = src + y * pitch_in;
3466         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3467         for (x = 0; x < w; ++x)
3468         {
3469             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3470              *     C = Y - 16; D = U - 128; E = V - 128;
3471              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3472              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3473              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3474              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3475              * U and V are shared between the pixels. */
3476             if (!(x & 1)) /* For every even pixel, read new U and V. */
3477             {
3478                 d = (int) src_line[1] - 128;
3479                 e = (int) src_line[3] - 128;
3480                 r2 = 409 * e + 128;
3481                 g2 = - 100 * d - 208 * e + 128;
3482                 b2 = 516 * d + 128;
3483             }
3484             c2 = 298 * ((int) src_line[0] - 16);
3485             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3486                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3487                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3488                 /* Scale RGB values to 0..255 range,
3489                  * then clip them if still not in range (may be negative),
3490                  * then shift them within DWORD if necessary. */
3491             src_line += 2;
3492         }
3493     }
3494 }
3495
3496 struct d3dfmt_convertor_desc
3497 {
3498     enum wined3d_format_id from, to;
3499     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3500 };
3501
3502 static const struct d3dfmt_convertor_desc convertors[] =
3503 {
3504     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3505     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3506     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3507     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3508     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3509     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3510 };
3511
3512 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3513         enum wined3d_format_id to)
3514 {
3515     unsigned int i;
3516
3517     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3518     {
3519         if (convertors[i].from == from && convertors[i].to == to)
3520             return &convertors[i];
3521     }
3522
3523     return NULL;
3524 }
3525
3526 /*****************************************************************************
3527  * surface_convert_format
3528  *
3529  * Creates a duplicate of a surface in a different format. Is used by Blt to
3530  * blit between surfaces with different formats.
3531  *
3532  * Parameters
3533  *  source: Source surface
3534  *  fmt: Requested destination format
3535  *
3536  *****************************************************************************/
3537 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3538 {
3539     struct wined3d_mapped_rect src_map, dst_map;
3540     const struct d3dfmt_convertor_desc *conv;
3541     struct wined3d_surface *ret = NULL;
3542     HRESULT hr;
3543
3544     conv = find_convertor(source->resource.format->id, to_fmt);
3545     if (!conv)
3546     {
3547         FIXME("Cannot find a conversion function from format %s to %s.\n",
3548                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3549         return NULL;
3550     }
3551
3552     wined3d_surface_create(source->resource.device, source->resource.width,
3553             source->resource.height, to_fmt, 0 /* level */, 0 /* usage */, WINED3DPOOL_SCRATCH,
3554             WINED3DMULTISAMPLE_NONE /* TODO: Multisampled conversion */, 0 /* MultiSampleQuality */,
3555             source->surface_type, WINED3D_SURFACE_MAPPABLE | WINED3D_SURFACE_DISCARD,
3556             NULL /* parent */, &wined3d_null_parent_ops, &ret);
3557     if (!ret)
3558     {
3559         ERR("Failed to create a destination surface for conversion.\n");
3560         return NULL;
3561     }
3562
3563     memset(&src_map, 0, sizeof(src_map));
3564     memset(&dst_map, 0, sizeof(dst_map));
3565
3566     hr = wined3d_surface_map(source, &src_map, NULL, WINED3DLOCK_READONLY);
3567     if (FAILED(hr))
3568     {
3569         ERR("Failed to lock the source surface.\n");
3570         wined3d_surface_decref(ret);
3571         return NULL;
3572     }
3573     hr = wined3d_surface_map(ret, &dst_map, NULL, WINED3DLOCK_READONLY);
3574     if (FAILED(hr))
3575     {
3576         ERR("Failed to lock the destination surface.\n");
3577         wined3d_surface_unmap(source);
3578         wined3d_surface_decref(ret);
3579         return NULL;
3580     }
3581
3582     conv->convert(src_map.data, dst_map.data, src_map.row_pitch, dst_map.row_pitch,
3583             source->resource.width, source->resource.height);
3584
3585     wined3d_surface_unmap(ret);
3586     wined3d_surface_unmap(source);
3587
3588     return ret;
3589 }
3590
3591 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3592         unsigned int bpp, UINT pitch, DWORD color)
3593 {
3594     BYTE *first;
3595     int x, y;
3596
3597     /* Do first row */
3598
3599 #define COLORFILL_ROW(type) \
3600 do { \
3601     type *d = (type *)buf; \
3602     for (x = 0; x < width; ++x) \
3603         d[x] = (type)color; \
3604 } while(0)
3605
3606     switch (bpp)
3607     {
3608         case 1:
3609             COLORFILL_ROW(BYTE);
3610             break;
3611
3612         case 2:
3613             COLORFILL_ROW(WORD);
3614             break;
3615
3616         case 3:
3617         {
3618             BYTE *d = buf;
3619             for (x = 0; x < width; ++x, d += 3)
3620             {
3621                 d[0] = (color      ) & 0xFF;
3622                 d[1] = (color >>  8) & 0xFF;
3623                 d[2] = (color >> 16) & 0xFF;
3624             }
3625             break;
3626         }
3627         case 4:
3628             COLORFILL_ROW(DWORD);
3629             break;
3630
3631         default:
3632             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3633             return WINED3DERR_NOTAVAILABLE;
3634     }
3635
3636 #undef COLORFILL_ROW
3637
3638     /* Now copy first row. */
3639     first = buf;
3640     for (y = 1; y < height; ++y)
3641     {
3642         buf += pitch;
3643         memcpy(buf, first, width * bpp);
3644     }
3645
3646     return WINED3D_OK;
3647 }
3648
3649 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3650 {
3651     TRACE("surface %p.\n", surface);
3652
3653     if (!(surface->flags & SFLAG_LOCKED))
3654     {
3655         WARN("Trying to unmap unmapped surface.\n");
3656         return WINEDDERR_NOTLOCKED;
3657     }
3658     surface->flags &= ~SFLAG_LOCKED;
3659
3660     surface->surface_ops->surface_unmap(surface);
3661
3662     return WINED3D_OK;
3663 }
3664
3665 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3666         struct wined3d_mapped_rect *mapped_rect, const RECT *rect, DWORD flags)
3667 {
3668     const struct wined3d_format *format = surface->resource.format;
3669
3670     TRACE("surface %p, mapped_rect %p, rect %s, flags %#x.\n",
3671             surface, mapped_rect, wine_dbgstr_rect(rect), flags);
3672
3673     if (surface->flags & SFLAG_LOCKED)
3674     {
3675         WARN("Surface is already mapped.\n");
3676         return WINED3DERR_INVALIDCALL;
3677     }
3678     if ((format->flags & WINED3DFMT_FLAG_BLOCKS)
3679             && rect && (rect->left || rect->top
3680             || rect->right != surface->resource.width
3681             || rect->bottom != surface->resource.height))
3682     {
3683         UINT width_mask = format->block_width - 1;
3684         UINT height_mask = format->block_height - 1;
3685
3686         if ((rect->left & width_mask) || (rect->right & width_mask)
3687                 || (rect->top & height_mask) || (rect->bottom & height_mask))
3688         {
3689             WARN("Map rect %s is misaligned for %ux%u blocks.\n",
3690                     wine_dbgstr_rect(rect), format->block_width, format->block_height);
3691
3692             if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3693                 return WINED3DERR_INVALIDCALL;
3694         }
3695     }
3696
3697     surface->flags |= SFLAG_LOCKED;
3698
3699     if (!(surface->flags & SFLAG_LOCKABLE))
3700         WARN("Trying to lock unlockable surface.\n");
3701
3702     /* Performance optimization: Count how often a surface is mapped, if it is
3703      * mapped regularly do not throw away the system memory copy. This avoids
3704      * the need to download the surface from OpenGL all the time. The surface
3705      * is still downloaded if the OpenGL texture is changed. */
3706     if (!(surface->flags & SFLAG_DYNLOCK))
3707     {
3708         if (++surface->lockCount > MAXLOCKCOUNT)
3709         {
3710             TRACE("Surface is mapped regularly, not freeing the system memory copy any more.\n");
3711             surface->flags |= SFLAG_DYNLOCK;
3712         }
3713     }
3714
3715     surface->surface_ops->surface_map(surface, rect, flags);
3716
3717     if (format->flags & WINED3DFMT_FLAG_BROKEN_PITCH)
3718         mapped_rect->row_pitch = surface->resource.width * format->byte_count;
3719     else
3720         mapped_rect->row_pitch = wined3d_surface_get_pitch(surface);
3721
3722     if (!rect)
3723     {
3724         mapped_rect->data = surface->resource.allocatedMemory;
3725         surface->lockedRect.left = 0;
3726         surface->lockedRect.top = 0;
3727         surface->lockedRect.right = surface->resource.width;
3728         surface->lockedRect.bottom = surface->resource.height;
3729     }
3730     else
3731     {
3732         if ((format->flags & (WINED3DFMT_FLAG_BLOCKS | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_BLOCKS)
3733         {
3734             /* Compressed textures are block based, so calculate the offset of
3735              * the block that contains the top-left pixel of the locked rectangle. */
3736             mapped_rect->data = surface->resource.allocatedMemory
3737                     + ((rect->top / format->block_height) * mapped_rect->row_pitch)
3738                     + ((rect->left / format->block_width) * format->block_byte_count);
3739         }
3740         else
3741         {
3742             mapped_rect->data = surface->resource.allocatedMemory
3743                     + (mapped_rect->row_pitch * rect->top)
3744                     + (rect->left * format->byte_count);
3745         }
3746         surface->lockedRect.left = rect->left;
3747         surface->lockedRect.top = rect->top;
3748         surface->lockedRect.right = rect->right;
3749         surface->lockedRect.bottom = rect->bottom;
3750     }
3751
3752     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3753     TRACE("Returning memory %p, pitch %u.\n", mapped_rect->data, mapped_rect->row_pitch);
3754
3755     return WINED3D_OK;
3756 }
3757
3758 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3759 {
3760     struct wined3d_mapped_rect map;
3761     HRESULT hr;
3762
3763     TRACE("surface %p, dc %p.\n", surface, dc);
3764
3765     if (surface->flags & SFLAG_USERPTR)
3766     {
3767         ERR("Not supported on surfaces with application-provided memory.\n");
3768         return WINEDDERR_NODC;
3769     }
3770
3771     /* Give more detailed info for ddraw. */
3772     if (surface->flags & SFLAG_DCINUSE)
3773         return WINEDDERR_DCALREADYCREATED;
3774
3775     /* Can't GetDC if the surface is locked. */
3776     if (surface->flags & SFLAG_LOCKED)
3777         return WINED3DERR_INVALIDCALL;
3778
3779     /* Create a DIB section if there isn't a dc yet. */
3780     if (!surface->hDC)
3781     {
3782         if (surface->flags & SFLAG_CLIENT)
3783         {
3784             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3785             surface_release_client_storage(surface);
3786         }
3787         hr = surface_create_dib_section(surface);
3788         if (FAILED(hr))
3789             return WINED3DERR_INVALIDCALL;
3790
3791         /* Use the DIB section from now on if we are not using a PBO. */
3792         if (!(surface->flags & SFLAG_PBO))
3793             surface->resource.allocatedMemory = surface->dib.bitmap_data;
3794     }
3795
3796     /* Map the surface. */
3797     hr = wined3d_surface_map(surface, &map, NULL, 0);
3798     if (FAILED(hr))
3799     {
3800         ERR("Map failed, hr %#x.\n", hr);
3801         return hr;
3802     }
3803
3804     /* Sync the DIB with the PBO. This can't be done earlier because Map()
3805      * activates the allocatedMemory. */
3806     if (surface->flags & SFLAG_PBO)
3807         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
3808
3809     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3810             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3811     {
3812         /* GetDC on palettized formats is unsupported in D3D9, and the method
3813          * is missing in D3D8, so this should only be used for DX <=7
3814          * surfaces (with non-device palettes). */
3815         const PALETTEENTRY *pal = NULL;
3816
3817         if (surface->palette)
3818         {
3819             pal = surface->palette->palents;
3820         }
3821         else
3822         {
3823             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3824             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3825
3826             if (dds_primary && dds_primary->palette)
3827                 pal = dds_primary->palette->palents;
3828         }
3829
3830         if (pal)
3831         {
3832             RGBQUAD col[256];
3833             unsigned int i;
3834
3835             for (i = 0; i < 256; ++i)
3836             {
3837                 col[i].rgbRed = pal[i].peRed;
3838                 col[i].rgbGreen = pal[i].peGreen;
3839                 col[i].rgbBlue = pal[i].peBlue;
3840                 col[i].rgbReserved = 0;
3841             }
3842             SetDIBColorTable(surface->hDC, 0, 256, col);
3843         }
3844     }
3845
3846     surface->flags |= SFLAG_DCINUSE;
3847
3848     *dc = surface->hDC;
3849     TRACE("Returning dc %p.\n", *dc);
3850
3851     return WINED3D_OK;
3852 }
3853
3854 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3855 {
3856     TRACE("surface %p, dc %p.\n", surface, dc);
3857
3858     if (!(surface->flags & SFLAG_DCINUSE))
3859         return WINEDDERR_NODC;
3860
3861     if (surface->hDC != dc)
3862     {
3863         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3864                 dc, surface->hDC);
3865         return WINEDDERR_NODC;
3866     }
3867
3868     /* Copy the contents of the DIB over to the PBO. */
3869     if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
3870         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
3871
3872     /* We locked first, so unlock now. */
3873     wined3d_surface_unmap(surface);
3874
3875     surface->flags &= ~SFLAG_DCINUSE;
3876
3877     return WINED3D_OK;
3878 }
3879
3880 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3881 {
3882     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3883
3884     if (flags)
3885     {
3886         static UINT once;
3887         if (!once++)
3888             FIXME("Ignoring flags %#x.\n", flags);
3889         else
3890             WARN("Ignoring flags %#x.\n", flags);
3891     }
3892
3893     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
3894     {
3895         ERR("Not supported on swapchain surfaces.\n");
3896         return WINEDDERR_NOTFLIPPABLE;
3897     }
3898
3899     /* Flipping is only supported on render targets and overlays. */
3900     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
3901     {
3902         WARN("Tried to flip a non-render target, non-overlay surface.\n");
3903         return WINEDDERR_NOTFLIPPABLE;
3904     }
3905
3906     flip_surface(surface, override);
3907
3908     /* Update overlays if they're visible. */
3909     if ((surface->resource.usage & WINED3DUSAGE_OVERLAY) && surface->overlay_dest)
3910         return surface_draw_overlay(surface);
3911
3912     return WINED3D_OK;
3913 }
3914
3915 /* Do not call while under the GL lock. */
3916 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3917 {
3918     struct wined3d_device *device = surface->resource.device;
3919
3920     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3921
3922     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3923     {
3924         struct wined3d_texture *texture = surface->container.u.texture;
3925
3926         TRACE("Passing to container (%p).\n", texture);
3927         texture->texture_ops->texture_preload(texture, srgb);
3928     }
3929     else
3930     {
3931         struct wined3d_context *context;
3932
3933         TRACE("(%p) : About to load surface\n", surface);
3934
3935         /* TODO: Use already acquired context when possible. */
3936         context = context_acquire(device, NULL);
3937
3938         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3939
3940         if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3941         {
3942             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3943             GLclampf tmp;
3944             tmp = 0.9f;
3945             ENTER_GL();
3946             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3947             LEAVE_GL();
3948         }
3949
3950         context_release(context);
3951     }
3952 }
3953
3954 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3955 {
3956     if (!surface->resource.allocatedMemory)
3957     {
3958         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
3959                 surface->resource.size + RESOURCE_ALIGNMENT);
3960         if (!surface->resource.heapMemory)
3961         {
3962             ERR("Out of memory\n");
3963             return FALSE;
3964         }
3965         surface->resource.allocatedMemory =
3966             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
3967     }
3968     else
3969     {
3970         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
3971     }
3972
3973     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3974
3975     return TRUE;
3976 }
3977
3978 /* Read the framebuffer back into the surface */
3979 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
3980 {
3981     struct wined3d_device *device = surface->resource.device;
3982     const struct wined3d_gl_info *gl_info;
3983     struct wined3d_context *context;
3984     BYTE *mem;
3985     GLint fmt;
3986     GLint type;
3987     BYTE *row, *top, *bottom;
3988     int i;
3989     BOOL bpp;
3990     RECT local_rect;
3991     BOOL srcIsUpsideDown;
3992     GLint rowLen = 0;
3993     GLint skipPix = 0;
3994     GLint skipRow = 0;
3995
3996     context = context_acquire(device, surface);
3997     context_apply_blit_state(context, device);
3998     gl_info = context->gl_info;
3999
4000     ENTER_GL();
4001
4002     /* Select the correct read buffer, and give some debug output.
4003      * There is no need to keep track of the current read buffer or reset it, every part of the code
4004      * that reads sets the read buffer as desired.
4005      */
4006     if (surface_is_offscreen(surface))
4007     {
4008         /* Mapping the primary render target which is not on a swapchain.
4009          * Read from the back buffer. */
4010         TRACE("Mapping offscreen render target.\n");
4011         glReadBuffer(device->offscreenBuffer);
4012         srcIsUpsideDown = TRUE;
4013     }
4014     else
4015     {
4016         /* Onscreen surfaces are always part of a swapchain */
4017         GLenum buffer = surface_get_gl_buffer(surface);
4018         TRACE("Mapping %#x buffer.\n", buffer);
4019         glReadBuffer(buffer);
4020         checkGLcall("glReadBuffer");
4021         srcIsUpsideDown = FALSE;
4022     }
4023
4024     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
4025     if (!rect)
4026     {
4027         local_rect.left = 0;
4028         local_rect.top = 0;
4029         local_rect.right = surface->resource.width;
4030         local_rect.bottom = surface->resource.height;
4031     }
4032     else
4033     {
4034         local_rect = *rect;
4035     }
4036     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
4037
4038     switch (surface->resource.format->id)
4039     {
4040         case WINED3DFMT_P8_UINT:
4041         {
4042             if (primary_render_target_is_p8(device))
4043             {
4044                 /* In case of P8 render targets the index is stored in the alpha component */
4045                 fmt = GL_ALPHA;
4046                 type = GL_UNSIGNED_BYTE;
4047                 mem = dest;
4048                 bpp = surface->resource.format->byte_count;
4049             }
4050             else
4051             {
4052                 /* GL can't return palettized data, so read ARGB pixels into a
4053                  * separate block of memory and convert them into palettized format
4054                  * in software. Slow, but if the app means to use palettized render
4055                  * targets and locks it...
4056                  *
4057                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4058                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4059                  * for the color channels when palettizing the colors.
4060                  */
4061                 fmt = GL_RGB;
4062                 type = GL_UNSIGNED_BYTE;
4063                 pitch *= 3;
4064                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4065                 if (!mem)
4066                 {
4067                     ERR("Out of memory\n");
4068                     LEAVE_GL();
4069                     return;
4070                 }
4071                 bpp = surface->resource.format->byte_count * 3;
4072             }
4073         }
4074         break;
4075
4076         default:
4077             mem = dest;
4078             fmt = surface->resource.format->glFormat;
4079             type = surface->resource.format->glType;
4080             bpp = surface->resource.format->byte_count;
4081     }
4082
4083     if (surface->flags & SFLAG_PBO)
4084     {
4085         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4086         checkGLcall("glBindBufferARB");
4087         if (mem)
4088         {
4089             ERR("mem not null for pbo -- unexpected\n");
4090             mem = NULL;
4091         }
4092     }
4093
4094     /* Save old pixel store pack state */
4095     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4096     checkGLcall("glGetIntegerv");
4097     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4098     checkGLcall("glGetIntegerv");
4099     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4100     checkGLcall("glGetIntegerv");
4101
4102     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4103     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4104     checkGLcall("glPixelStorei");
4105     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4106     checkGLcall("glPixelStorei");
4107     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4108     checkGLcall("glPixelStorei");
4109
4110     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4111             local_rect.right - local_rect.left,
4112             local_rect.bottom - local_rect.top,
4113             fmt, type, mem);
4114     checkGLcall("glReadPixels");
4115
4116     /* Reset previous pixel store pack state */
4117     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4118     checkGLcall("glPixelStorei");
4119     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4120     checkGLcall("glPixelStorei");
4121     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4122     checkGLcall("glPixelStorei");
4123
4124     if (surface->flags & SFLAG_PBO)
4125     {
4126         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4127         checkGLcall("glBindBufferARB");
4128
4129         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4130          * to get a pointer to it and perform the flipping in software. This is a lot
4131          * faster than calling glReadPixels for each line. In case we want more speed
4132          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4133         if (!srcIsUpsideDown)
4134         {
4135             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4136             checkGLcall("glBindBufferARB");
4137
4138             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4139             checkGLcall("glMapBufferARB");
4140         }
4141     }
4142
4143     /* TODO: Merge this with the palettization loop below for P8 targets */
4144     if(!srcIsUpsideDown) {
4145         UINT len, off;
4146         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4147             Flip the lines in software */
4148         len = (local_rect.right - local_rect.left) * bpp;
4149         off = local_rect.left * bpp;
4150
4151         row = HeapAlloc(GetProcessHeap(), 0, len);
4152         if(!row) {
4153             ERR("Out of memory\n");
4154             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4155                 HeapFree(GetProcessHeap(), 0, mem);
4156             LEAVE_GL();
4157             return;
4158         }
4159
4160         top = mem + pitch * local_rect.top;
4161         bottom = mem + pitch * (local_rect.bottom - 1);
4162         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4163             memcpy(row, top + off, len);
4164             memcpy(top + off, bottom + off, len);
4165             memcpy(bottom + off, row, len);
4166             top += pitch;
4167             bottom -= pitch;
4168         }
4169         HeapFree(GetProcessHeap(), 0, row);
4170
4171         /* Unmap the temp PBO buffer */
4172         if (surface->flags & SFLAG_PBO)
4173         {
4174             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4175             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4176         }
4177     }
4178
4179     LEAVE_GL();
4180     context_release(context);
4181
4182     /* For P8 textures we need to perform an inverse palette lookup. This is
4183      * done by searching for a palette index which matches the RGB value.
4184      * Note this isn't guaranteed to work when there are multiple entries for
4185      * the same color but we have no choice. In case of P8 render targets,
4186      * the index is stored in the alpha component so no conversion is needed. */
4187     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4188     {
4189         const PALETTEENTRY *pal = NULL;
4190         DWORD width = pitch / 3;
4191         int x, y, c;
4192
4193         if (surface->palette)
4194         {
4195             pal = surface->palette->palents;
4196         }
4197         else
4198         {
4199             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4200             HeapFree(GetProcessHeap(), 0, mem);
4201             return;
4202         }
4203
4204         for(y = local_rect.top; y < local_rect.bottom; y++) {
4205             for(x = local_rect.left; x < local_rect.right; x++) {
4206                 /*                      start              lines            pixels      */
4207                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4208                 const BYTE *green = blue  + 1;
4209                 const BYTE *red = green + 1;
4210
4211                 for(c = 0; c < 256; c++) {
4212                     if(*red   == pal[c].peRed   &&
4213                        *green == pal[c].peGreen &&
4214                        *blue  == pal[c].peBlue)
4215                     {
4216                         *((BYTE *) dest + y * width + x) = c;
4217                         break;
4218                     }
4219                 }
4220             }
4221         }
4222         HeapFree(GetProcessHeap(), 0, mem);
4223     }
4224 }
4225
4226 /* Read the framebuffer contents into a texture. Note that this function
4227  * doesn't do any kind of flipping. Using this on an onscreen surface will
4228  * result in a flipped D3D texture. */
4229 void surface_load_fb_texture(struct wined3d_surface *surface, BOOL srgb)
4230 {
4231     struct wined3d_device *device = surface->resource.device;
4232     struct wined3d_context *context;
4233
4234     context = context_acquire(device, surface);
4235     device_invalidate_state(device, STATE_FRAMEBUFFER);
4236
4237     surface_prepare_texture(surface, context, srgb);
4238     surface_bind_and_dirtify(surface, context, srgb);
4239
4240     TRACE("Reading back offscreen render target %p.\n", surface);
4241
4242     ENTER_GL();
4243
4244     if (surface_is_offscreen(surface))
4245         glReadBuffer(device->offscreenBuffer);
4246     else
4247         glReadBuffer(surface_get_gl_buffer(surface));
4248     checkGLcall("glReadBuffer");
4249
4250     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4251             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4252     checkGLcall("glCopyTexSubImage2D");
4253
4254     LEAVE_GL();
4255
4256     context_release(context);
4257 }
4258
4259 /* Context activation is done by the caller. */
4260 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4261         struct wined3d_context *context, BOOL srgb)
4262 {
4263     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4264     CONVERT_TYPES convert;
4265     struct wined3d_format format;
4266
4267     if (surface->flags & alloc_flag) return;
4268
4269     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4270     if (convert != NO_CONVERSION || format.convert) surface->flags |= SFLAG_CONVERTED;
4271     else surface->flags &= ~SFLAG_CONVERTED;
4272
4273     surface_bind_and_dirtify(surface, context, srgb);
4274     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4275     surface->flags |= alloc_flag;
4276 }
4277
4278 /* Context activation is done by the caller. */
4279 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4280 {
4281     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4282     {
4283         struct wined3d_texture *texture = surface->container.u.texture;
4284         UINT sub_count = texture->level_count * texture->layer_count;
4285         UINT i;
4286
4287         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4288
4289         for (i = 0; i < sub_count; ++i)
4290         {
4291             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4292             surface_prepare_texture_internal(s, context, srgb);
4293         }
4294
4295         return;
4296     }
4297
4298     surface_prepare_texture_internal(surface, context, srgb);
4299 }
4300
4301 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4302 {
4303     if (multisample)
4304     {
4305         if (surface->rb_multisample)
4306             return;
4307
4308         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4309         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4310         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4311                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4312         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4313     }
4314     else
4315     {
4316         if (surface->rb_resolved)
4317             return;
4318
4319         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4320         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4321         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4322                 surface->pow2Width, surface->pow2Height);
4323         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4324     }
4325 }
4326
4327 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4328         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4329 {
4330     struct wined3d_device *device = surface->resource.device;
4331     UINT pitch = wined3d_surface_get_pitch(surface);
4332     const struct wined3d_gl_info *gl_info;
4333     struct wined3d_context *context;
4334     RECT local_rect;
4335     UINT w, h;
4336
4337     surface_get_rect(surface, rect, &local_rect);
4338
4339     mem += local_rect.top * pitch + local_rect.left * bpp;
4340     w = local_rect.right - local_rect.left;
4341     h = local_rect.bottom - local_rect.top;
4342
4343     /* Activate the correct context for the render target */
4344     context = context_acquire(device, surface);
4345     context_apply_blit_state(context, device);
4346     gl_info = context->gl_info;
4347
4348     ENTER_GL();
4349
4350     if (!surface_is_offscreen(surface))
4351     {
4352         GLenum buffer = surface_get_gl_buffer(surface);
4353         TRACE("Unlocking %#x buffer.\n", buffer);
4354         context_set_draw_buffer(context, buffer);
4355
4356         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4357         glPixelZoom(1.0f, -1.0f);
4358     }
4359     else
4360     {
4361         /* Primary offscreen render target */
4362         TRACE("Offscreen render target.\n");
4363         context_set_draw_buffer(context, device->offscreenBuffer);
4364
4365         glPixelZoom(1.0f, 1.0f);
4366     }
4367
4368     glRasterPos3i(local_rect.left, local_rect.top, 1);
4369     checkGLcall("glRasterPos3i");
4370
4371     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4372     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4373
4374     if (surface->flags & SFLAG_PBO)
4375     {
4376         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4377         checkGLcall("glBindBufferARB");
4378     }
4379
4380     glDrawPixels(w, h, fmt, type, mem);
4381     checkGLcall("glDrawPixels");
4382
4383     if (surface->flags & SFLAG_PBO)
4384     {
4385         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4386         checkGLcall("glBindBufferARB");
4387     }
4388
4389     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4390     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4391
4392     LEAVE_GL();
4393
4394     if (wined3d_settings.strict_draw_ordering
4395             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4396             && surface->container.u.swapchain->front_buffer == surface))
4397         wglFlush();
4398
4399     context_release(context);
4400 }
4401
4402 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck,
4403         BOOL use_texturing, struct wined3d_format *format, CONVERT_TYPES *convert)
4404 {
4405     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4406     const struct wined3d_device *device = surface->resource.device;
4407     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4408     BOOL blit_supported = FALSE;
4409
4410     /* Copy the default values from the surface. Below we might perform fixups */
4411     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4412     *format = *surface->resource.format;
4413     *convert = NO_CONVERSION;
4414
4415     /* Ok, now look if we have to do any conversion */
4416     switch (surface->resource.format->id)
4417     {
4418         case WINED3DFMT_P8_UINT:
4419             /* Below the call to blit_supported is disabled for Wine 1.2
4420              * because the function isn't operating correctly yet. At the
4421              * moment 8-bit blits are handled in software and if certain GL
4422              * extensions are around, surface conversion is performed at
4423              * upload time. The blit_supported call recognizes it as a
4424              * destination fixup. This type of upload 'fixup' and 8-bit to
4425              * 8-bit blits need to be handled by the blit_shader.
4426              * TODO: get rid of this #if 0. */
4427 #if 0
4428             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4429                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4430                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4431 #endif
4432             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4433
4434             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4435              * texturing. Further also use conversion in case of color keying.
4436              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4437              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4438              * conflicts with this.
4439              */
4440             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4441                     || colorkey_active || !use_texturing)
4442             {
4443                 format->glFormat = GL_RGBA;
4444                 format->glInternal = GL_RGBA;
4445                 format->glType = GL_UNSIGNED_BYTE;
4446                 format->conv_byte_count = 4;
4447                 if (colorkey_active)
4448                     *convert = CONVERT_PALETTED_CK;
4449                 else
4450                     *convert = CONVERT_PALETTED;
4451             }
4452             break;
4453
4454         case WINED3DFMT_B2G3R3_UNORM:
4455             /* **********************
4456                 GL_UNSIGNED_BYTE_3_3_2
4457                 ********************** */
4458             if (colorkey_active) {
4459                 /* This texture format will never be used.. So do not care about color keying
4460                     up until the point in time it will be needed :-) */
4461                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4462             }
4463             break;
4464
4465         case WINED3DFMT_B5G6R5_UNORM:
4466             if (colorkey_active)
4467             {
4468                 *convert = CONVERT_CK_565;
4469                 format->glFormat = GL_RGBA;
4470                 format->glInternal = GL_RGB5_A1;
4471                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4472                 format->conv_byte_count = 2;
4473             }
4474             break;
4475
4476         case WINED3DFMT_B5G5R5X1_UNORM:
4477             if (colorkey_active)
4478             {
4479                 *convert = CONVERT_CK_5551;
4480                 format->glFormat = GL_BGRA;
4481                 format->glInternal = GL_RGB5_A1;
4482                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4483                 format->conv_byte_count = 2;
4484             }
4485             break;
4486
4487         case WINED3DFMT_B8G8R8_UNORM:
4488             if (colorkey_active)
4489             {
4490                 *convert = CONVERT_CK_RGB24;
4491                 format->glFormat = GL_RGBA;
4492                 format->glInternal = GL_RGBA8;
4493                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4494                 format->conv_byte_count = 4;
4495             }
4496             break;
4497
4498         case WINED3DFMT_B8G8R8X8_UNORM:
4499             if (colorkey_active)
4500             {
4501                 *convert = CONVERT_RGB32_888;
4502                 format->glFormat = GL_RGBA;
4503                 format->glInternal = GL_RGBA8;
4504                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4505                 format->conv_byte_count = 4;
4506             }
4507             break;
4508
4509         default:
4510             break;
4511     }
4512
4513     return WINED3D_OK;
4514 }
4515
4516 static BOOL color_in_range(const struct wined3d_color_key *color_key, DWORD color)
4517 {
4518     /* FIXME: Is this really how color keys are supposed to work? I think it
4519      * makes more sense to compare the individual channels. */
4520     return color >= color_key->color_space_low_value
4521             && color <= color_key->color_space_high_value;
4522 }
4523
4524 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4525 {
4526     const struct wined3d_device *device = surface->resource.device;
4527     const struct wined3d_palette *pal = surface->palette;
4528     BOOL index_in_alpha = FALSE;
4529     unsigned int i;
4530
4531     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4532      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4533      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4534      * duplicate entries. Store the color key in the unused alpha component to speed the
4535      * download up and to make conversion unneeded. */
4536     index_in_alpha = primary_render_target_is_p8(device);
4537
4538     if (!pal)
4539     {
4540         ERR("This code should never get entered for DirectDraw!, expect problems\n");
4541         if (index_in_alpha)
4542         {
4543             /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4544              * there's no palette at this time. */
4545             for (i = 0; i < 256; i++) table[i][3] = i;
4546         }
4547     }
4548     else
4549     {
4550         TRACE("Using surface palette %p\n", pal);
4551         /* Get the surface's palette */
4552         for (i = 0; i < 256; ++i)
4553         {
4554             table[i][0] = pal->palents[i].peRed;
4555             table[i][1] = pal->palents[i].peGreen;
4556             table[i][2] = pal->palents[i].peBlue;
4557
4558             /* When index_in_alpha is set the palette index is stored in the
4559              * alpha component. In case of a readback we can then read
4560              * GL_ALPHA. Color keying is handled in BltOverride using a
4561              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4562              * color key itself is passed to glAlphaFunc in other cases the
4563              * alpha component of pixels that should be masked away is set to 0. */
4564             if (index_in_alpha)
4565                 table[i][3] = i;
4566             else if (colorkey && color_in_range(&surface->src_blt_color_key, i))
4567                 table[i][3] = 0x00;
4568             else if (pal->flags & WINEDDPCAPS_ALPHA)
4569                 table[i][3] = pal->palents[i].peFlags;
4570             else
4571                 table[i][3] = 0xFF;
4572         }
4573     }
4574 }
4575
4576 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width,
4577         UINT height, UINT outpitch, CONVERT_TYPES convert, struct wined3d_surface *surface)
4578 {
4579     const BYTE *source;
4580     BYTE *dest;
4581     TRACE("(%p)->(%p),(%d,%d,%d,%d,%p)\n", src, dst, pitch, height, outpitch, convert, surface);
4582
4583     switch (convert) {
4584         case NO_CONVERSION:
4585         {
4586             memcpy(dst, src, pitch * height);
4587             break;
4588         }
4589         case CONVERT_PALETTED:
4590         case CONVERT_PALETTED_CK:
4591         {
4592             BYTE table[256][4];
4593             unsigned int x, y;
4594
4595             d3dfmt_p8_init_palette(surface, table, (convert == CONVERT_PALETTED_CK));
4596
4597             for (y = 0; y < height; y++)
4598             {
4599                 source = src + pitch * y;
4600                 dest = dst + outpitch * y;
4601                 /* This is an 1 bpp format, using the width here is fine */
4602                 for (x = 0; x < width; x++) {
4603                     BYTE color = *source++;
4604                     *dest++ = table[color][0];
4605                     *dest++ = table[color][1];
4606                     *dest++ = table[color][2];
4607                     *dest++ = table[color][3];
4608                 }
4609             }
4610         }
4611         break;
4612
4613         case CONVERT_CK_565:
4614         {
4615             /* Converting the 565 format in 5551 packed to emulate color-keying.
4616
4617               Note : in all these conversion, it would be best to average the averaging
4618                       pixels to get the color of the pixel that will be color-keyed to
4619                       prevent 'color bleeding'. This will be done later on if ever it is
4620                       too visible.
4621
4622               Note2: Nvidia documents say that their driver does not support alpha + color keying
4623                      on the same surface and disables color keying in such a case
4624             */
4625             unsigned int x, y;
4626             const WORD *Source;
4627             WORD *Dest;
4628
4629             TRACE("Color keyed 565\n");
4630
4631             for (y = 0; y < height; y++) {
4632                 Source = (const WORD *)(src + y * pitch);
4633                 Dest = (WORD *) (dst + y * outpitch);
4634                 for (x = 0; x < width; x++ ) {
4635                     WORD color = *Source++;
4636                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4637                     if (!color_in_range(&surface->src_blt_color_key, color))
4638                         *Dest |= 0x0001;
4639                     Dest++;
4640                 }
4641             }
4642         }
4643         break;
4644
4645         case CONVERT_CK_5551:
4646         {
4647             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4648             unsigned int x, y;
4649             const WORD *Source;
4650             WORD *Dest;
4651             TRACE("Color keyed 5551\n");
4652             for (y = 0; y < height; y++) {
4653                 Source = (const WORD *)(src + y * pitch);
4654                 Dest = (WORD *) (dst + y * outpitch);
4655                 for (x = 0; x < width; x++ ) {
4656                     WORD color = *Source++;
4657                     *Dest = color;
4658                     if (!color_in_range(&surface->src_blt_color_key, color))
4659                         *Dest |= (1 << 15);
4660                     else
4661                         *Dest &= ~(1 << 15);
4662                     Dest++;
4663                 }
4664             }
4665         }
4666         break;
4667
4668         case CONVERT_CK_RGB24:
4669         {
4670             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4671             unsigned int x, y;
4672             for (y = 0; y < height; y++)
4673             {
4674                 source = src + pitch * y;
4675                 dest = dst + outpitch * y;
4676                 for (x = 0; x < width; x++) {
4677                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4678                     DWORD dstcolor = color << 8;
4679                     if (!color_in_range(&surface->src_blt_color_key, color))
4680                         dstcolor |= 0xff;
4681                     *(DWORD*)dest = dstcolor;
4682                     source += 3;
4683                     dest += 4;
4684                 }
4685             }
4686         }
4687         break;
4688
4689         case CONVERT_RGB32_888:
4690         {
4691             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4692             unsigned int x, y;
4693             for (y = 0; y < height; y++)
4694             {
4695                 source = src + pitch * y;
4696                 dest = dst + outpitch * y;
4697                 for (x = 0; x < width; x++) {
4698                     DWORD color = 0xffffff & *(const DWORD*)source;
4699                     DWORD dstcolor = color << 8;
4700                     if (!color_in_range(&surface->src_blt_color_key, color))
4701                         dstcolor |= 0xff;
4702                     *(DWORD*)dest = dstcolor;
4703                     source += 4;
4704                     dest += 4;
4705                 }
4706             }
4707         }
4708         break;
4709
4710         default:
4711             ERR("Unsupported conversion type %#x.\n", convert);
4712     }
4713     return WINED3D_OK;
4714 }
4715
4716 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4717 {
4718     /* Flip the surface contents */
4719     /* Flip the DC */
4720     {
4721         HDC tmp;
4722         tmp = front->hDC;
4723         front->hDC = back->hDC;
4724         back->hDC = tmp;
4725     }
4726
4727     /* Flip the DIBsection */
4728     {
4729         HBITMAP tmp;
4730         BOOL hasDib = front->flags & SFLAG_DIBSECTION;
4731         tmp = front->dib.DIBsection;
4732         front->dib.DIBsection = back->dib.DIBsection;
4733         back->dib.DIBsection = tmp;
4734
4735         if (back->flags & SFLAG_DIBSECTION) front->flags |= SFLAG_DIBSECTION;
4736         else front->flags &= ~SFLAG_DIBSECTION;
4737         if (hasDib) back->flags |= SFLAG_DIBSECTION;
4738         else back->flags &= ~SFLAG_DIBSECTION;
4739     }
4740
4741     /* Flip the surface data */
4742     {
4743         void* tmp;
4744
4745         tmp = front->dib.bitmap_data;
4746         front->dib.bitmap_data = back->dib.bitmap_data;
4747         back->dib.bitmap_data = tmp;
4748
4749         tmp = front->resource.allocatedMemory;
4750         front->resource.allocatedMemory = back->resource.allocatedMemory;
4751         back->resource.allocatedMemory = tmp;
4752
4753         tmp = front->resource.heapMemory;
4754         front->resource.heapMemory = back->resource.heapMemory;
4755         back->resource.heapMemory = tmp;
4756     }
4757
4758     /* Flip the PBO */
4759     {
4760         GLuint tmp_pbo = front->pbo;
4761         front->pbo = back->pbo;
4762         back->pbo = tmp_pbo;
4763     }
4764
4765     /* Flip the opengl texture */
4766     {
4767         GLuint tmp;
4768
4769         tmp = back->texture_name;
4770         back->texture_name = front->texture_name;
4771         front->texture_name = tmp;
4772
4773         tmp = back->texture_name_srgb;
4774         back->texture_name_srgb = front->texture_name_srgb;
4775         front->texture_name_srgb = tmp;
4776
4777         tmp = back->rb_multisample;
4778         back->rb_multisample = front->rb_multisample;
4779         front->rb_multisample = tmp;
4780
4781         tmp = back->rb_resolved;
4782         back->rb_resolved = front->rb_resolved;
4783         front->rb_resolved = tmp;
4784
4785         resource_unload(&back->resource);
4786         resource_unload(&front->resource);
4787     }
4788
4789     {
4790         DWORD tmp_flags = back->flags;
4791         back->flags = front->flags;
4792         front->flags = tmp_flags;
4793     }
4794 }
4795
4796 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4797  * pixel copy calls. */
4798 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4799         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4800 {
4801     struct wined3d_device *device = dst_surface->resource.device;
4802     float xrel, yrel;
4803     UINT row;
4804     struct wined3d_context *context;
4805     BOOL upsidedown = FALSE;
4806     RECT dst_rect = *dst_rect_in;
4807
4808     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4809      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4810      */
4811     if(dst_rect.top > dst_rect.bottom) {
4812         UINT tmp = dst_rect.bottom;
4813         dst_rect.bottom = dst_rect.top;
4814         dst_rect.top = tmp;
4815         upsidedown = TRUE;
4816     }
4817
4818     context = context_acquire(device, src_surface);
4819     context_apply_blit_state(context, device);
4820     surface_internal_preload(dst_surface, SRGB_RGB);
4821     ENTER_GL();
4822
4823     /* Bind the target texture */
4824     context_bind_texture(context, dst_surface->texture_target, dst_surface->texture_name);
4825     if (surface_is_offscreen(src_surface))
4826     {
4827         TRACE("Reading from an offscreen target\n");
4828         upsidedown = !upsidedown;
4829         glReadBuffer(device->offscreenBuffer);
4830     }
4831     else
4832     {
4833         glReadBuffer(surface_get_gl_buffer(src_surface));
4834     }
4835     checkGLcall("glReadBuffer");
4836
4837     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4838     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4839
4840     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4841     {
4842         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4843
4844         if(Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT) {
4845             ERR("Texture filtering not supported in direct blit\n");
4846         }
4847     }
4848     else if ((Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT)
4849             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4850     {
4851         ERR("Texture filtering not supported in direct blit\n");
4852     }
4853
4854     if (upsidedown
4855             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4856             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4857     {
4858         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4859
4860         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4861                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4862                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4863                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4864     }
4865     else
4866     {
4867         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4868         /* I have to process this row by row to swap the image,
4869          * otherwise it would be upside down, so stretching in y direction
4870          * doesn't cost extra time
4871          *
4872          * However, stretching in x direction can be avoided if not necessary
4873          */
4874         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4875             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4876             {
4877                 /* Well, that stuff works, but it's very slow.
4878                  * find a better way instead
4879                  */
4880                 UINT col;
4881
4882                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4883                 {
4884                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4885                             dst_rect.left + col /* x offset */, row /* y offset */,
4886                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4887                 }
4888             }
4889             else
4890             {
4891                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4892                         dst_rect.left /* x offset */, row /* y offset */,
4893                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4894             }
4895         }
4896     }
4897     checkGLcall("glCopyTexSubImage2D");
4898
4899     LEAVE_GL();
4900     context_release(context);
4901
4902     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4903      * path is never entered
4904      */
4905     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4906 }
4907
4908 /* Uses the hardware to stretch and flip the image */
4909 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4910         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4911 {
4912     struct wined3d_device *device = dst_surface->resource.device;
4913     struct wined3d_swapchain *src_swapchain = NULL;
4914     GLuint src, backup = 0;
4915     float left, right, top, bottom; /* Texture coordinates */
4916     UINT fbwidth = src_surface->resource.width;
4917     UINT fbheight = src_surface->resource.height;
4918     struct wined3d_context *context;
4919     GLenum drawBuffer = GL_BACK;
4920     GLenum texture_target;
4921     BOOL noBackBufferBackup;
4922     BOOL src_offscreen;
4923     BOOL upsidedown = FALSE;
4924     RECT dst_rect = *dst_rect_in;
4925
4926     TRACE("Using hwstretch blit\n");
4927     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4928     context = context_acquire(device, src_surface);
4929     context_apply_blit_state(context, device);
4930     surface_internal_preload(dst_surface, SRGB_RGB);
4931
4932     src_offscreen = surface_is_offscreen(src_surface);
4933     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4934     if (!noBackBufferBackup && !src_surface->texture_name)
4935     {
4936         /* Get it a description */
4937         surface_internal_preload(src_surface, SRGB_RGB);
4938     }
4939     ENTER_GL();
4940
4941     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4942      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4943      */
4944     if (context->aux_buffers >= 2)
4945     {
4946         /* Got more than one aux buffer? Use the 2nd aux buffer */
4947         drawBuffer = GL_AUX1;
4948     }
4949     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4950     {
4951         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
4952         drawBuffer = GL_AUX0;
4953     }
4954
4955     if(noBackBufferBackup) {
4956         glGenTextures(1, &backup);
4957         checkGLcall("glGenTextures");
4958         context_bind_texture(context, GL_TEXTURE_2D, backup);
4959         texture_target = GL_TEXTURE_2D;
4960     } else {
4961         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
4962          * we are reading from the back buffer, the backup can be used as source texture
4963          */
4964         texture_target = src_surface->texture_target;
4965         context_bind_texture(context, texture_target, src_surface->texture_name);
4966         glEnable(texture_target);
4967         checkGLcall("glEnable(texture_target)");
4968
4969         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
4970         src_surface->flags &= ~SFLAG_INTEXTURE;
4971     }
4972
4973     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4974      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4975      */
4976     if(dst_rect.top > dst_rect.bottom) {
4977         UINT tmp = dst_rect.bottom;
4978         dst_rect.bottom = dst_rect.top;
4979         dst_rect.top = tmp;
4980         upsidedown = TRUE;
4981     }
4982
4983     if (src_offscreen)
4984     {
4985         TRACE("Reading from an offscreen target\n");
4986         upsidedown = !upsidedown;
4987         glReadBuffer(device->offscreenBuffer);
4988     }
4989     else
4990     {
4991         glReadBuffer(surface_get_gl_buffer(src_surface));
4992     }
4993
4994     /* TODO: Only back up the part that will be overwritten */
4995     glCopyTexSubImage2D(texture_target, 0,
4996                         0, 0 /* read offsets */,
4997                         0, 0,
4998                         fbwidth,
4999                         fbheight);
5000
5001     checkGLcall("glCopyTexSubImage2D");
5002
5003     /* No issue with overriding these - the sampler is dirty due to blit usage */
5004     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
5005             wined3d_gl_mag_filter(magLookup, Filter));
5006     checkGLcall("glTexParameteri");
5007     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
5008             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
5009     checkGLcall("glTexParameteri");
5010
5011     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5012         src_swapchain = src_surface->container.u.swapchain;
5013     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
5014     {
5015         src = backup ? backup : src_surface->texture_name;
5016     }
5017     else
5018     {
5019         glReadBuffer(GL_FRONT);
5020         checkGLcall("glReadBuffer(GL_FRONT)");
5021
5022         glGenTextures(1, &src);
5023         checkGLcall("glGenTextures(1, &src)");
5024         context_bind_texture(context, GL_TEXTURE_2D, src);
5025
5026         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5027          * out for power of 2 sizes
5028          */
5029         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5030                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5031         checkGLcall("glTexImage2D");
5032         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
5033                             0, 0 /* read offsets */,
5034                             0, 0,
5035                             fbwidth,
5036                             fbheight);
5037
5038         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5039         checkGLcall("glTexParameteri");
5040         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5041         checkGLcall("glTexParameteri");
5042
5043         glReadBuffer(GL_BACK);
5044         checkGLcall("glReadBuffer(GL_BACK)");
5045
5046         if(texture_target != GL_TEXTURE_2D) {
5047             glDisable(texture_target);
5048             glEnable(GL_TEXTURE_2D);
5049             texture_target = GL_TEXTURE_2D;
5050         }
5051     }
5052     checkGLcall("glEnd and previous");
5053
5054     left = src_rect->left;
5055     right = src_rect->right;
5056
5057     if (!upsidedown)
5058     {
5059         top = src_surface->resource.height - src_rect->top;
5060         bottom = src_surface->resource.height - src_rect->bottom;
5061     }
5062     else
5063     {
5064         top = src_surface->resource.height - src_rect->bottom;
5065         bottom = src_surface->resource.height - src_rect->top;
5066     }
5067
5068     if (src_surface->flags & SFLAG_NORMCOORD)
5069     {
5070         left /= src_surface->pow2Width;
5071         right /= src_surface->pow2Width;
5072         top /= src_surface->pow2Height;
5073         bottom /= src_surface->pow2Height;
5074     }
5075
5076     /* draw the source texture stretched and upside down. The correct surface is bound already */
5077     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5078     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5079
5080     context_set_draw_buffer(context, drawBuffer);
5081     glReadBuffer(drawBuffer);
5082
5083     glBegin(GL_QUADS);
5084         /* bottom left */
5085         glTexCoord2f(left, bottom);
5086         glVertex2i(0, 0);
5087
5088         /* top left */
5089         glTexCoord2f(left, top);
5090         glVertex2i(0, dst_rect.bottom - dst_rect.top);
5091
5092         /* top right */
5093         glTexCoord2f(right, top);
5094         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5095
5096         /* bottom right */
5097         glTexCoord2f(right, bottom);
5098         glVertex2i(dst_rect.right - dst_rect.left, 0);
5099     glEnd();
5100     checkGLcall("glEnd and previous");
5101
5102     if (texture_target != dst_surface->texture_target)
5103     {
5104         glDisable(texture_target);
5105         glEnable(dst_surface->texture_target);
5106         texture_target = dst_surface->texture_target;
5107     }
5108
5109     /* Now read the stretched and upside down image into the destination texture */
5110     context_bind_texture(context, texture_target, dst_surface->texture_name);
5111     glCopyTexSubImage2D(texture_target,
5112                         0,
5113                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5114                         0, 0, /* We blitted the image to the origin */
5115                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5116     checkGLcall("glCopyTexSubImage2D");
5117
5118     if(drawBuffer == GL_BACK) {
5119         /* Write the back buffer backup back */
5120         if(backup) {
5121             if(texture_target != GL_TEXTURE_2D) {
5122                 glDisable(texture_target);
5123                 glEnable(GL_TEXTURE_2D);
5124                 texture_target = GL_TEXTURE_2D;
5125             }
5126             context_bind_texture(context, GL_TEXTURE_2D, backup);
5127         }
5128         else
5129         {
5130             if (texture_target != src_surface->texture_target)
5131             {
5132                 glDisable(texture_target);
5133                 glEnable(src_surface->texture_target);
5134                 texture_target = src_surface->texture_target;
5135             }
5136             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5137         }
5138
5139         glBegin(GL_QUADS);
5140             /* top left */
5141             glTexCoord2f(0.0f, 0.0f);
5142             glVertex2i(0, fbheight);
5143
5144             /* bottom left */
5145             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5146             glVertex2i(0, 0);
5147
5148             /* bottom right */
5149             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5150                     (float)fbheight / (float)src_surface->pow2Height);
5151             glVertex2i(fbwidth, 0);
5152
5153             /* top right */
5154             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5155             glVertex2i(fbwidth, fbheight);
5156         glEnd();
5157     }
5158     glDisable(texture_target);
5159     checkGLcall("glDisable(texture_target)");
5160
5161     /* Cleanup */
5162     if (src != src_surface->texture_name && src != backup)
5163     {
5164         glDeleteTextures(1, &src);
5165         checkGLcall("glDeleteTextures(1, &src)");
5166     }
5167     if(backup) {
5168         glDeleteTextures(1, &backup);
5169         checkGLcall("glDeleteTextures(1, &backup)");
5170     }
5171
5172     LEAVE_GL();
5173
5174     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5175
5176     context_release(context);
5177
5178     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5179      * path is never entered
5180      */
5181     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5182 }
5183
5184 /* Front buffer coordinates are always full screen coordinates, but our GL
5185  * drawable is limited to the window's client area. The sysmem and texture
5186  * copies do have the full screen size. Note that GL has a bottom-left
5187  * origin, while D3D has a top-left origin. */
5188 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5189 {
5190     UINT drawable_height;
5191
5192     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5193             && surface == surface->container.u.swapchain->front_buffer)
5194     {
5195         POINT offset = {0, 0};
5196         RECT windowsize;
5197
5198         ScreenToClient(window, &offset);
5199         OffsetRect(rect, offset.x, offset.y);
5200
5201         GetClientRect(window, &windowsize);
5202         drawable_height = windowsize.bottom - windowsize.top;
5203     }
5204     else
5205     {
5206         drawable_height = surface->resource.height;
5207     }
5208
5209     rect->top = drawable_height - rect->top;
5210     rect->bottom = drawable_height - rect->bottom;
5211 }
5212
5213 static void surface_blt_to_drawable(const struct wined3d_device *device,
5214         WINED3DTEXTUREFILTERTYPE filter, BOOL color_key,
5215         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5216         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5217 {
5218     struct wined3d_context *context;
5219     RECT src_rect, dst_rect;
5220
5221     src_rect = *src_rect_in;
5222     dst_rect = *dst_rect_in;
5223
5224     /* Make sure the surface is up-to-date. This should probably use
5225      * surface_load_location() and worry about the destination surface too,
5226      * unless we're overwriting it completely. */
5227     surface_internal_preload(src_surface, SRGB_RGB);
5228
5229     /* Activate the destination context, set it up for blitting */
5230     context = context_acquire(device, dst_surface);
5231     context_apply_blit_state(context, device);
5232
5233     if (!surface_is_offscreen(dst_surface))
5234         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5235
5236     device->blitter->set_shader(device->blit_priv, context, src_surface);
5237
5238     ENTER_GL();
5239
5240     if (color_key)
5241     {
5242         glEnable(GL_ALPHA_TEST);
5243         checkGLcall("glEnable(GL_ALPHA_TEST)");
5244
5245         /* When the primary render target uses P8, the alpha component
5246          * contains the palette index. Which means that the colorkey is one of
5247          * the palette entries. In other cases pixels that should be masked
5248          * away have alpha set to 0. */
5249         if (primary_render_target_is_p8(device))
5250             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->src_blt_color_key.color_space_low_value / 256.0f);
5251         else
5252             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5253         checkGLcall("glAlphaFunc");
5254     }
5255     else
5256     {
5257         glDisable(GL_ALPHA_TEST);
5258         checkGLcall("glDisable(GL_ALPHA_TEST)");
5259     }
5260
5261     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5262
5263     if (color_key)
5264     {
5265         glDisable(GL_ALPHA_TEST);
5266         checkGLcall("glDisable(GL_ALPHA_TEST)");
5267     }
5268
5269     LEAVE_GL();
5270
5271     /* Leave the opengl state valid for blitting */
5272     device->blitter->unset_shader(context->gl_info);
5273
5274     if (wined3d_settings.strict_draw_ordering
5275             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5276             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5277         wglFlush(); /* Flush to ensure ordering across contexts. */
5278
5279     context_release(context);
5280 }
5281
5282 /* Do not call while under the GL lock. */
5283 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const struct wined3d_color *color)
5284 {
5285     struct wined3d_device *device = s->resource.device;
5286     const struct blit_shader *blitter;
5287
5288     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5289             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5290     if (!blitter)
5291     {
5292         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5293         return WINED3DERR_INVALIDCALL;
5294     }
5295
5296     return blitter->color_fill(device, s, rect, color);
5297 }
5298
5299 /* Do not call while under the GL lock. */
5300 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5301         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5302         WINED3DTEXTUREFILTERTYPE Filter)
5303 {
5304     struct wined3d_device *device = dst_surface->resource.device;
5305     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5306     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5307
5308     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5309             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5310             flags, DDBltFx, debug_d3dtexturefiltertype(Filter));
5311
5312     /* Get the swapchain. One of the surfaces has to be a primary surface */
5313     if (dst_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5314     {
5315         WARN("Destination is in sysmem, rejecting gl blt\n");
5316         return WINED3DERR_INVALIDCALL;
5317     }
5318
5319     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5320         dstSwapchain = dst_surface->container.u.swapchain;
5321
5322     if (src_surface)
5323     {
5324         if (src_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5325         {
5326             WARN("Src is in sysmem, rejecting gl blt\n");
5327             return WINED3DERR_INVALIDCALL;
5328         }
5329
5330         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5331             srcSwapchain = src_surface->container.u.swapchain;
5332     }
5333
5334     /* Early sort out of cases where no render target is used */
5335     if (!dstSwapchain && !srcSwapchain
5336             && src_surface != device->fb.render_targets[0]
5337             && dst_surface != device->fb.render_targets[0])
5338     {
5339         TRACE("No surface is render target, not using hardware blit.\n");
5340         return WINED3DERR_INVALIDCALL;
5341     }
5342
5343     /* No destination color keying supported */
5344     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5345     {
5346         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5347         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5348         return WINED3DERR_INVALIDCALL;
5349     }
5350
5351     if (dstSwapchain && dstSwapchain == srcSwapchain)
5352     {
5353         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5354         return WINED3DERR_INVALIDCALL;
5355     }
5356
5357     if (dstSwapchain && srcSwapchain)
5358     {
5359         FIXME("Implement hardware blit between two different swapchains\n");
5360         return WINED3DERR_INVALIDCALL;
5361     }
5362
5363     if (dstSwapchain)
5364     {
5365         /* Handled with regular texture -> swapchain blit */
5366         if (src_surface == device->fb.render_targets[0])
5367             TRACE("Blit from active render target to a swapchain\n");
5368     }
5369     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5370     {
5371         FIXME("Implement blit from a swapchain to the active render target\n");
5372         return WINED3DERR_INVALIDCALL;
5373     }
5374
5375     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5376     {
5377         /* Blit from render target to texture */
5378         BOOL stretchx;
5379
5380         /* P8 read back is not implemented */
5381         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5382                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5383         {
5384             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5385             return WINED3DERR_INVALIDCALL;
5386         }
5387
5388         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5389         {
5390             TRACE("Color keying not supported by frame buffer to texture blit\n");
5391             return WINED3DERR_INVALIDCALL;
5392             /* Destination color key is checked above */
5393         }
5394
5395         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5396             stretchx = TRUE;
5397         else
5398             stretchx = FALSE;
5399
5400         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5401          * flip the image nor scale it.
5402          *
5403          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5404          * -> If the app wants a image width an unscaled width, copy it line per line
5405          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5406          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5407          *    back buffer. This is slower than reading line per line, thus not used for flipping
5408          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5409          *    pixel by pixel. */
5410         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5411                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5412         {
5413             TRACE("No stretching in x direction, using direct framebuffer -> texture copy\n");
5414             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, Filter);
5415         } else {
5416             TRACE("Using hardware stretching to flip / stretch the texture\n");
5417             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, Filter);
5418         }
5419
5420         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5421         {
5422             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5423             dst_surface->resource.allocatedMemory = NULL;
5424             dst_surface->resource.heapMemory = NULL;
5425         }
5426         else
5427         {
5428             dst_surface->flags &= ~SFLAG_INSYSMEM;
5429         }
5430
5431         return WINED3D_OK;
5432     }
5433     else if (src_surface)
5434     {
5435         /* Blit from offscreen surface to render target */
5436         struct wined3d_color_key old_blt_key = src_surface->src_blt_color_key;
5437         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5438
5439         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5440
5441         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5442                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5443                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5444         {
5445             FIXME("Unsupported blit operation falling back to software\n");
5446             return WINED3DERR_INVALIDCALL;
5447         }
5448
5449         /* Color keying: Check if we have to do a color keyed blt,
5450          * and if not check if a color key is activated.
5451          *
5452          * Just modify the color keying parameters in the surface and restore them afterwards
5453          * The surface keeps track of the color key last used to load the opengl surface.
5454          * PreLoad will catch the change to the flags and color key and reload if necessary.
5455          */
5456         if (flags & WINEDDBLT_KEYSRC)
5457         {
5458             /* Use color key from surface */
5459         }
5460         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5461         {
5462             /* Use color key from DDBltFx */
5463             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5464             src_surface->src_blt_color_key = DDBltFx->ddckSrcColorkey;
5465         }
5466         else
5467         {
5468             /* Do not use color key */
5469             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5470         }
5471
5472         surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5473                 src_surface, src_rect, dst_surface, dst_rect);
5474
5475         /* Restore the color key parameters */
5476         src_surface->CKeyFlags = oldCKeyFlags;
5477         src_surface->src_blt_color_key = old_blt_key;
5478
5479         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5480
5481         return WINED3D_OK;
5482     }
5483
5484     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5485     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5486     return WINED3DERR_INVALIDCALL;
5487 }
5488
5489 /* GL locking is done by the caller */
5490 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5491         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5492 {
5493     struct wined3d_device *device = surface->resource.device;
5494     const struct wined3d_gl_info *gl_info = context->gl_info;
5495     GLint compare_mode = GL_NONE;
5496     struct blt_info info;
5497     GLint old_binding = 0;
5498     RECT rect;
5499
5500     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5501
5502     glDisable(GL_CULL_FACE);
5503     glDisable(GL_BLEND);
5504     glDisable(GL_ALPHA_TEST);
5505     glDisable(GL_SCISSOR_TEST);
5506     glDisable(GL_STENCIL_TEST);
5507     glEnable(GL_DEPTH_TEST);
5508     glDepthFunc(GL_ALWAYS);
5509     glDepthMask(GL_TRUE);
5510     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5511     glViewport(x, y, w, h);
5512
5513     SetRect(&rect, 0, h, w, 0);
5514     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5515     context_active_texture(context, context->gl_info, 0);
5516     glGetIntegerv(info.binding, &old_binding);
5517     glBindTexture(info.bind_target, texture);
5518     if (gl_info->supported[ARB_SHADOW])
5519     {
5520         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5521         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5522     }
5523
5524     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5525             gl_info, info.tex_type, &surface->ds_current_size);
5526
5527     glBegin(GL_TRIANGLE_STRIP);
5528     glTexCoord3fv(info.coords[0]);
5529     glVertex2f(-1.0f, -1.0f);
5530     glTexCoord3fv(info.coords[1]);
5531     glVertex2f(1.0f, -1.0f);
5532     glTexCoord3fv(info.coords[2]);
5533     glVertex2f(-1.0f, 1.0f);
5534     glTexCoord3fv(info.coords[3]);
5535     glVertex2f(1.0f, 1.0f);
5536     glEnd();
5537
5538     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5539     glBindTexture(info.bind_target, old_binding);
5540
5541     glPopAttrib();
5542
5543     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5544 }
5545
5546 void surface_modify_ds_location(struct wined3d_surface *surface,
5547         DWORD location, UINT w, UINT h)
5548 {
5549     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5550
5551     if (location & ~SFLAG_DS_LOCATIONS)
5552         FIXME("Invalid location (%#x) specified.\n", location);
5553
5554     surface->ds_current_size.cx = w;
5555     surface->ds_current_size.cy = h;
5556     surface->flags &= ~SFLAG_DS_LOCATIONS;
5557     surface->flags |= location;
5558 }
5559
5560 /* Context activation is done by the caller. */
5561 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5562 {
5563     struct wined3d_device *device = surface->resource.device;
5564     GLsizei w, h;
5565
5566     TRACE("surface %p, new location %#x.\n", surface, location);
5567
5568     /* TODO: Make this work for modes other than FBO */
5569     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5570
5571     if (!(surface->flags & location))
5572     {
5573         w = surface->ds_current_size.cx;
5574         h = surface->ds_current_size.cy;
5575         surface->ds_current_size.cx = 0;
5576         surface->ds_current_size.cy = 0;
5577     }
5578     else
5579     {
5580         w = surface->resource.width;
5581         h = surface->resource.height;
5582     }
5583
5584     if (surface->ds_current_size.cx == surface->resource.width
5585             && surface->ds_current_size.cy == surface->resource.height)
5586     {
5587         TRACE("Location (%#x) is already up to date.\n", location);
5588         return;
5589     }
5590
5591     if (surface->current_renderbuffer)
5592     {
5593         FIXME("Not supported with fixed up depth stencil.\n");
5594         return;
5595     }
5596
5597     if (!(surface->flags & SFLAG_DS_LOCATIONS))
5598     {
5599         /* This mostly happens when a depth / stencil is used without being
5600          * cleared first. In principle we could upload from sysmem, or
5601          * explicitly clear before first usage. For the moment there don't
5602          * appear to be a lot of applications depending on this, so a FIXME
5603          * should do. */
5604         FIXME("No up to date depth stencil location.\n");
5605         surface->flags |= location;
5606         surface->ds_current_size.cx = surface->resource.width;
5607         surface->ds_current_size.cy = surface->resource.height;
5608         return;
5609     }
5610
5611     if (location == SFLAG_DS_OFFSCREEN)
5612     {
5613         GLint old_binding = 0;
5614         GLenum bind_target;
5615
5616         /* The render target is allowed to be smaller than the depth/stencil
5617          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5618          * than the offscreen surface. Don't overwrite the offscreen surface
5619          * with undefined data. */
5620         w = min(w, context->swapchain->desc.backbuffer_width);
5621         h = min(h, context->swapchain->desc.backbuffer_height);
5622
5623         TRACE("Copying onscreen depth buffer to depth texture.\n");
5624
5625         ENTER_GL();
5626
5627         if (!device->depth_blt_texture)
5628         {
5629             glGenTextures(1, &device->depth_blt_texture);
5630         }
5631
5632         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5633          * directly on the FBO texture. That's because we need to flip. */
5634         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5635                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5636         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5637         {
5638             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5639             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5640         }
5641         else
5642         {
5643             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5644             bind_target = GL_TEXTURE_2D;
5645         }
5646         glBindTexture(bind_target, device->depth_blt_texture);
5647         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5648          * internal format, because the internal format might include stencil
5649          * data. In principle we should copy stencil data as well, but unless
5650          * the driver supports stencil export it's hard to do, and doesn't
5651          * seem to be needed in practice. If the hardware doesn't support
5652          * writing stencil data, the glCopyTexImage2D() call might trigger
5653          * software fallbacks. */
5654         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5655         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5656         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5657         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5658         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5659         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5660         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5661         glBindTexture(bind_target, old_binding);
5662
5663         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5664                 NULL, surface, SFLAG_INTEXTURE);
5665         context_set_draw_buffer(context, GL_NONE);
5666         glReadBuffer(GL_NONE);
5667
5668         /* Do the actual blit */
5669         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5670         checkGLcall("depth_blt");
5671
5672         context_invalidate_state(context, STATE_FRAMEBUFFER);
5673
5674         LEAVE_GL();
5675
5676         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5677     }
5678     else if (location == SFLAG_DS_ONSCREEN)
5679     {
5680         TRACE("Copying depth texture to onscreen depth buffer.\n");
5681
5682         ENTER_GL();
5683
5684         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5685                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5686         surface_depth_blt(surface, context, surface->texture_name,
5687                 0, surface->pow2Height - h, w, h, surface->texture_target);
5688         checkGLcall("depth_blt");
5689
5690         context_invalidate_state(context, STATE_FRAMEBUFFER);
5691
5692         LEAVE_GL();
5693
5694         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5695     }
5696     else
5697     {
5698         ERR("Invalid location (%#x) specified.\n", location);
5699     }
5700
5701     surface->flags |= location;
5702     surface->ds_current_size.cx = surface->resource.width;
5703     surface->ds_current_size.cy = surface->resource.height;
5704 }
5705
5706 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5707 {
5708     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5709     struct wined3d_surface *overlay;
5710
5711     TRACE("surface %p, location %s, persistent %#x.\n",
5712             surface, debug_surflocation(location), persistent);
5713
5714     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5715             && (location & SFLAG_INDRAWABLE))
5716         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5717
5718     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5719             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5720         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5721
5722     if (persistent)
5723     {
5724         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5725                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5726         {
5727             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5728             {
5729                 TRACE("Passing to container.\n");
5730                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5731             }
5732         }
5733         surface->flags &= ~SFLAG_LOCATIONS;
5734         surface->flags |= location;
5735
5736         /* Redraw emulated overlays, if any */
5737         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5738         {
5739             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5740             {
5741                 surface_draw_overlay(overlay);
5742             }
5743         }
5744     }
5745     else
5746     {
5747         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5748         {
5749             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5750             {
5751                 TRACE("Passing to container\n");
5752                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5753             }
5754         }
5755         surface->flags &= ~location;
5756     }
5757
5758     if (!(surface->flags & SFLAG_LOCATIONS))
5759     {
5760         ERR("Surface %p does not have any up to date location.\n", surface);
5761     }
5762 }
5763
5764 static DWORD resource_access_from_location(DWORD location)
5765 {
5766     switch (location)
5767     {
5768         case SFLAG_INSYSMEM:
5769             return WINED3D_RESOURCE_ACCESS_CPU;
5770
5771         case SFLAG_INDRAWABLE:
5772         case SFLAG_INSRGBTEX:
5773         case SFLAG_INTEXTURE:
5774         case SFLAG_INRB_MULTISAMPLE:
5775         case SFLAG_INRB_RESOLVED:
5776             return WINED3D_RESOURCE_ACCESS_GPU;
5777
5778         default:
5779             FIXME("Unhandled location %#x.\n", location);
5780             return 0;
5781     }
5782 }
5783
5784 static void surface_load_sysmem(struct wined3d_surface *surface,
5785         const struct wined3d_gl_info *gl_info, const RECT *rect)
5786 {
5787     surface_prepare_system_memory(surface);
5788
5789     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED))
5790         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5791
5792     /* Download the surface to system memory. */
5793     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5794     {
5795         struct wined3d_device *device = surface->resource.device;
5796         struct wined3d_context *context;
5797
5798         /* TODO: Use already acquired context when possible. */
5799         context = context_acquire(device, NULL);
5800
5801         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5802         surface_download_data(surface, gl_info);
5803
5804         context_release(context);
5805
5806         return;
5807     }
5808
5809     if (surface->flags & SFLAG_INDRAWABLE)
5810     {
5811         read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5812                 wined3d_surface_get_pitch(surface));
5813         return;
5814     }
5815
5816     FIXME("Can't load surface %p with location flags %#x into sysmem.\n",
5817             surface, surface->flags & SFLAG_LOCATIONS);
5818 }
5819
5820 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5821         const struct wined3d_gl_info *gl_info, const RECT *rect)
5822 {
5823     struct wined3d_device *device = surface->resource.device;
5824     struct wined3d_format format;
5825     CONVERT_TYPES convert;
5826     UINT byte_count;
5827     BYTE *mem;
5828
5829     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5830     {
5831         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5832         return WINED3DERR_INVALIDCALL;
5833     }
5834
5835     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5836         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5837
5838     if (surface->flags & SFLAG_INTEXTURE)
5839     {
5840         RECT r;
5841
5842         surface_get_rect(surface, rect, &r);
5843         surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
5844
5845         return WINED3D_OK;
5846     }
5847
5848     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5849     {
5850         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5851          * path through sysmem. */
5852         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5853     }
5854
5855     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5856
5857     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5858      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5859      * called. */
5860     if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
5861     {
5862         struct wined3d_context *context;
5863
5864         TRACE("Removing the pbo attached to surface %p.\n", surface);
5865
5866         /* TODO: Use already acquired context when possible. */
5867         context = context_acquire(device, NULL);
5868
5869         surface_remove_pbo(surface, gl_info);
5870
5871         context_release(context);
5872     }
5873
5874     if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
5875     {
5876         UINT height = surface->resource.height;
5877         UINT width = surface->resource.width;
5878         UINT src_pitch, dst_pitch;
5879
5880         byte_count = format.conv_byte_count;
5881         src_pitch = wined3d_surface_get_pitch(surface);
5882
5883         /* Stick to the alignment for the converted surface too, makes it
5884          * easier to load the surface. */
5885         dst_pitch = width * byte_count;
5886         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5887
5888         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5889         {
5890             ERR("Out of memory (%u).\n", dst_pitch * height);
5891             return E_OUTOFMEMORY;
5892         }
5893
5894         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5895                 src_pitch, width, height, dst_pitch, convert, surface);
5896
5897         surface->flags |= SFLAG_CONVERTED;
5898     }
5899     else
5900     {
5901         surface->flags &= ~SFLAG_CONVERTED;
5902         mem = surface->resource.allocatedMemory;
5903         byte_count = format.byte_count;
5904     }
5905
5906     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
5907
5908     /* Don't delete PBO memory. */
5909     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
5910         HeapFree(GetProcessHeap(), 0, mem);
5911
5912     return WINED3D_OK;
5913 }
5914
5915 static HRESULT surface_load_texture(struct wined3d_surface *surface,
5916         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
5917 {
5918     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
5919     struct wined3d_device *device = surface->resource.device;
5920     struct wined3d_context *context;
5921     UINT width, src_pitch, dst_pitch;
5922     struct wined3d_bo_address data;
5923     struct wined3d_format format;
5924     POINT dst_point = {0, 0};
5925     CONVERT_TYPES convert;
5926     BYTE *mem;
5927
5928     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
5929             && surface_is_offscreen(surface)
5930             && (surface->flags & SFLAG_INDRAWABLE))
5931     {
5932         surface_load_fb_texture(surface, srgb);
5933
5934         return WINED3D_OK;
5935     }
5936
5937     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
5938             && (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB)
5939             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5940                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
5941                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
5942     {
5943         if (srgb)
5944             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INTEXTURE,
5945                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
5946         else
5947             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INSRGBTEX,
5948                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
5949
5950         return WINED3D_OK;
5951     }
5952
5953     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED)
5954             && (!srgb || (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB))
5955             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5956                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
5957                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
5958     {
5959         DWORD src_location = surface->flags & SFLAG_INRB_RESOLVED ? SFLAG_INRB_RESOLVED : SFLAG_INRB_MULTISAMPLE;
5960         DWORD dst_location = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
5961         RECT rect = {0, 0, surface->resource.width, surface->resource.height};
5962
5963         surface_blt_fbo(device, WINED3DTEXF_POINT, surface, src_location,
5964                 &rect, surface, dst_location, &rect);
5965
5966         return WINED3D_OK;
5967     }
5968
5969     /* Upload from system memory */
5970
5971     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
5972             TRUE /* We will use textures */, &format, &convert);
5973
5974     if (srgb)
5975     {
5976         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
5977         {
5978             /* Performance warning... */
5979             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
5980             surface_load_location(surface, SFLAG_INSYSMEM, rect);
5981         }
5982     }
5983     else
5984     {
5985         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
5986         {
5987             /* Performance warning... */
5988             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
5989             surface_load_location(surface, SFLAG_INSYSMEM, rect);
5990         }
5991     }
5992
5993     if (!(surface->flags & SFLAG_INSYSMEM))
5994     {
5995         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
5996         /* Lets hope we get it from somewhere... */
5997         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5998     }
5999
6000     /* TODO: Use already acquired context when possible. */
6001     context = context_acquire(device, NULL);
6002
6003     surface_prepare_texture(surface, context, srgb);
6004     surface_bind_and_dirtify(surface, context, srgb);
6005
6006     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6007     {
6008         surface->flags |= SFLAG_GLCKEY;
6009         surface->gl_color_key = surface->src_blt_color_key;
6010     }
6011     else surface->flags &= ~SFLAG_GLCKEY;
6012
6013     width = surface->resource.width;
6014     src_pitch = wined3d_surface_get_pitch(surface);
6015
6016     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6017      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6018      * called. */
6019     if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
6020     {
6021         TRACE("Removing the pbo attached to surface %p.\n", surface);
6022         surface_remove_pbo(surface, gl_info);
6023     }
6024
6025     if (format.convert)
6026     {
6027         /* This code is entered for texture formats which need a fixup. */
6028         UINT height = surface->resource.height;
6029
6030         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6031         dst_pitch = width * format.conv_byte_count;
6032         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6033
6034         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6035         {
6036             ERR("Out of memory (%u).\n", dst_pitch * height);
6037             context_release(context);
6038             return E_OUTOFMEMORY;
6039         }
6040         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6041         format.byte_count = format.conv_byte_count;
6042         src_pitch = dst_pitch;
6043     }
6044     else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
6045     {
6046         /* This code is only entered for color keying fixups */
6047         UINT height = surface->resource.height;
6048
6049         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6050         dst_pitch = width * format.conv_byte_count;
6051         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6052
6053         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6054         {
6055             ERR("Out of memory (%u).\n", dst_pitch * height);
6056             context_release(context);
6057             return E_OUTOFMEMORY;
6058         }
6059         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6060                 width, height, dst_pitch, convert, surface);
6061         format.byte_count = format.conv_byte_count;
6062         src_pitch = dst_pitch;
6063     }
6064     else
6065     {
6066         mem = surface->resource.allocatedMemory;
6067     }
6068
6069     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6070     data.addr = mem;
6071     surface_upload_data(surface, gl_info, &format, &src_rect, src_pitch, &dst_point, srgb, &data);
6072
6073     context_release(context);
6074
6075     /* Don't delete PBO memory. */
6076     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6077         HeapFree(GetProcessHeap(), 0, mem);
6078
6079     return WINED3D_OK;
6080 }
6081
6082 static void surface_multisample_resolve(struct wined3d_surface *surface)
6083 {
6084     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6085
6086     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6087         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6088
6089     surface_blt_fbo(surface->resource.device, WINED3DTEXF_POINT,
6090             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6091 }
6092
6093 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6094 {
6095     struct wined3d_device *device = surface->resource.device;
6096     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6097     HRESULT hr;
6098
6099     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6100
6101     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6102     {
6103         if (location == SFLAG_INTEXTURE)
6104         {
6105             struct wined3d_context *context = context_acquire(device, NULL);
6106             surface_load_ds_location(surface, context, SFLAG_DS_OFFSCREEN);
6107             context_release(context);
6108             return WINED3D_OK;
6109         }
6110         else
6111         {
6112             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6113             return WINED3DERR_INVALIDCALL;
6114         }
6115     }
6116
6117     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6118         location = SFLAG_INTEXTURE;
6119
6120     if (surface->flags & location)
6121     {
6122         TRACE("Location already up to date.\n");
6123
6124         if (location == SFLAG_INSYSMEM && !(surface->flags & SFLAG_PBO)
6125                 && surface_need_pbo(surface, gl_info))
6126             surface_load_pbo(surface, gl_info);
6127
6128         return WINED3D_OK;
6129     }
6130
6131     if (WARN_ON(d3d_surface))
6132     {
6133         DWORD required_access = resource_access_from_location(location);
6134         if ((surface->resource.access_flags & required_access) != required_access)
6135             WARN("Operation requires %#x access, but surface only has %#x.\n",
6136                     required_access, surface->resource.access_flags);
6137     }
6138
6139     if (!(surface->flags & SFLAG_LOCATIONS))
6140     {
6141         ERR("Surface %p does not have any up to date location.\n", surface);
6142         surface->flags |= SFLAG_LOST;
6143         return WINED3DERR_DEVICELOST;
6144     }
6145
6146     switch (location)
6147     {
6148         case SFLAG_INSYSMEM:
6149             surface_load_sysmem(surface, gl_info, rect);
6150             break;
6151
6152         case SFLAG_INDRAWABLE:
6153             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6154                 return hr;
6155             break;
6156
6157         case SFLAG_INRB_RESOLVED:
6158             surface_multisample_resolve(surface);
6159             break;
6160
6161         case SFLAG_INTEXTURE:
6162         case SFLAG_INSRGBTEX:
6163             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6164                 return hr;
6165             break;
6166
6167         default:
6168             ERR("Don't know how to handle location %#x.\n", location);
6169             break;
6170     }
6171
6172     if (!rect)
6173     {
6174         surface->flags |= location;
6175
6176         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6177             surface_evict_sysmem(surface);
6178     }
6179
6180     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6181             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6182     {
6183         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6184     }
6185
6186     return WINED3D_OK;
6187 }
6188
6189 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6190 {
6191     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6192
6193     /* Not on a swapchain - must be offscreen */
6194     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6195
6196     /* The front buffer is always onscreen */
6197     if (surface == swapchain->front_buffer) return FALSE;
6198
6199     /* If the swapchain is rendered to an FBO, the backbuffer is
6200      * offscreen, otherwise onscreen */
6201     return swapchain->render_to_fbo;
6202 }
6203
6204 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6205 /* Context activation is done by the caller. */
6206 static void ffp_blit_free(struct wined3d_device *device) { }
6207
6208 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6209 /* Context activation is done by the caller. */
6210 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6211 {
6212     BYTE table[256][4];
6213     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6214
6215     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6216
6217     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6218     ENTER_GL();
6219     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6220     LEAVE_GL();
6221 }
6222
6223 /* Context activation is done by the caller. */
6224 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6225 {
6226     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6227
6228     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6229      * else the surface is converted in software at upload time in LoadLocation.
6230      */
6231     if (!(surface->flags & SFLAG_CONVERTED) && fixup == COMPLEX_FIXUP_P8
6232             && context->gl_info->supported[EXT_PALETTED_TEXTURE])
6233         ffp_blit_p8_upload_palette(surface, context->gl_info);
6234
6235     ENTER_GL();
6236     glEnable(surface->texture_target);
6237     checkGLcall("glEnable(surface->texture_target)");
6238     LEAVE_GL();
6239     return WINED3D_OK;
6240 }
6241
6242 /* Context activation is done by the caller. */
6243 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6244 {
6245     ENTER_GL();
6246     glDisable(GL_TEXTURE_2D);
6247     checkGLcall("glDisable(GL_TEXTURE_2D)");
6248     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6249     {
6250         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6251         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6252     }
6253     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6254     {
6255         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6256         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6257     }
6258     LEAVE_GL();
6259 }
6260
6261 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6262         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6263         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6264 {
6265     enum complex_fixup src_fixup;
6266
6267     switch (blit_op)
6268     {
6269         case WINED3D_BLIT_OP_COLOR_BLIT:
6270             if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
6271                 return FALSE;
6272
6273             src_fixup = get_complex_fixup(src_format->color_fixup);
6274             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6275             {
6276                 TRACE("Checking support for fixup:\n");
6277                 dump_color_fixup_desc(src_format->color_fixup);
6278             }
6279
6280             if (!is_identity_fixup(dst_format->color_fixup))
6281             {
6282                 TRACE("Destination fixups are not supported\n");
6283                 return FALSE;
6284             }
6285
6286             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6287             {
6288                 TRACE("P8 fixup supported\n");
6289                 return TRUE;
6290             }
6291
6292             /* We only support identity conversions. */
6293             if (is_identity_fixup(src_format->color_fixup))
6294             {
6295                 TRACE("[OK]\n");
6296                 return TRUE;
6297             }
6298
6299             TRACE("[FAILED]\n");
6300             return FALSE;
6301
6302         case WINED3D_BLIT_OP_COLOR_FILL:
6303             if (dst_pool == WINED3DPOOL_SYSTEMMEM)
6304                 return FALSE;
6305
6306             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6307             {
6308                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6309                     return FALSE;
6310             }
6311             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6312             {
6313                 TRACE("Color fill not supported\n");
6314                 return FALSE;
6315             }
6316
6317             /* FIXME: We should reject color fills on formats with fixups,
6318              * but this would break P8 color fills for example. */
6319
6320             return TRUE;
6321
6322         case WINED3D_BLIT_OP_DEPTH_FILL:
6323             return TRUE;
6324
6325         default:
6326             TRACE("Unsupported blit_op=%d\n", blit_op);
6327             return FALSE;
6328     }
6329 }
6330
6331 /* Do not call while under the GL lock. */
6332 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6333         const RECT *dst_rect, const struct wined3d_color *color)
6334 {
6335     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6336     struct wined3d_fb_state fb = {&dst_surface, NULL};
6337
6338     return device_clear_render_targets(device, 1, &fb,
6339             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6340 }
6341
6342 /* Do not call while under the GL lock. */
6343 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6344         struct wined3d_surface *surface, const RECT *rect, float depth)
6345 {
6346     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6347     struct wined3d_fb_state fb = {NULL, surface};
6348
6349     return device_clear_render_targets(device, 0, &fb,
6350             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6351 }
6352
6353 const struct blit_shader ffp_blit =  {
6354     ffp_blit_alloc,
6355     ffp_blit_free,
6356     ffp_blit_set,
6357     ffp_blit_unset,
6358     ffp_blit_supported,
6359     ffp_blit_color_fill,
6360     ffp_blit_depth_fill,
6361 };
6362
6363 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6364 {
6365     return WINED3D_OK;
6366 }
6367
6368 /* Context activation is done by the caller. */
6369 static void cpu_blit_free(struct wined3d_device *device)
6370 {
6371 }
6372
6373 /* Context activation is done by the caller. */
6374 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6375 {
6376     return WINED3D_OK;
6377 }
6378
6379 /* Context activation is done by the caller. */
6380 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6381 {
6382 }
6383
6384 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6385         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6386         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6387 {
6388     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6389     {
6390         return TRUE;
6391     }
6392
6393     return FALSE;
6394 }
6395
6396 static HRESULT surface_cpu_blt_compressed(const BYTE *src_data, BYTE *dst_data,
6397         UINT src_pitch, UINT dst_pitch, UINT update_w, UINT update_h,
6398         const struct wined3d_format *format, DWORD flags, const WINEDDBLTFX *fx)
6399 {
6400     UINT row_block_count;
6401     const BYTE *src_row;
6402     BYTE *dst_row;
6403     UINT x, y;
6404
6405     src_row = src_data;
6406     dst_row = dst_data;
6407
6408     row_block_count = (update_w + format->block_width - 1) / format->block_width;
6409
6410     if (!flags)
6411     {
6412         for (y = 0; y < update_h; y += format->block_height)
6413         {
6414             memcpy(dst_row, src_row, row_block_count * format->block_byte_count);
6415             src_row += src_pitch;
6416             dst_row += dst_pitch;
6417         }
6418
6419         return WINED3D_OK;
6420     }
6421
6422     if (flags == WINEDDBLT_DDFX && fx->dwDDFX == WINEDDBLTFX_MIRRORUPDOWN)
6423     {
6424         src_row += (((update_h / format->block_height) - 1) * src_pitch);
6425
6426         switch (format->id)
6427         {
6428             case WINED3DFMT_DXT1:
6429                 for (y = 0; y < update_h; y += format->block_height)
6430                 {
6431                     struct block
6432                     {
6433                         WORD color[2];
6434                         BYTE control_row[4];
6435                     };
6436
6437                     const struct block *s = (const struct block *)src_row;
6438                     struct block *d = (struct block *)dst_row;
6439
6440                     for (x = 0; x < row_block_count; ++x)
6441                     {
6442                         d[x].color[0] = s[x].color[0];
6443                         d[x].color[1] = s[x].color[1];
6444                         d[x].control_row[0] = s[x].control_row[3];
6445                         d[x].control_row[1] = s[x].control_row[2];
6446                         d[x].control_row[2] = s[x].control_row[1];
6447                         d[x].control_row[3] = s[x].control_row[0];
6448                     }
6449                     src_row -= src_pitch;
6450                     dst_row += dst_pitch;
6451                 }
6452                 return WINED3D_OK;
6453
6454             case WINED3DFMT_DXT3:
6455                 for (y = 0; y < update_h; y += format->block_height)
6456                 {
6457                     struct block
6458                     {
6459                         WORD alpha_row[4];
6460                         WORD color[2];
6461                         BYTE control_row[4];
6462                     };
6463
6464                     const struct block *s = (const struct block *)src_row;
6465                     struct block *d = (struct block *)dst_row;
6466
6467                     for (x = 0; x < row_block_count; ++x)
6468                     {
6469                         d[x].alpha_row[0] = s[x].alpha_row[3];
6470                         d[x].alpha_row[1] = s[x].alpha_row[2];
6471                         d[x].alpha_row[2] = s[x].alpha_row[1];
6472                         d[x].alpha_row[3] = s[x].alpha_row[0];
6473                         d[x].color[0] = s[x].color[0];
6474                         d[x].color[1] = s[x].color[1];
6475                         d[x].control_row[0] = s[x].control_row[3];
6476                         d[x].control_row[1] = s[x].control_row[2];
6477                         d[x].control_row[2] = s[x].control_row[1];
6478                         d[x].control_row[3] = s[x].control_row[0];
6479                     }
6480                     src_row -= src_pitch;
6481                     dst_row += dst_pitch;
6482                 }
6483                 return WINED3D_OK;
6484
6485             default:
6486                 FIXME("Compressed flip not implemented for format %s.\n",
6487                         debug_d3dformat(format->id));
6488                 return E_NOTIMPL;
6489         }
6490     }
6491
6492     FIXME("Unsupported blit on compressed surface (format %s, flags %#x, DDFX %#x).\n",
6493             debug_d3dformat(format->id), flags, flags & WINEDDBLT_DDFX ? fx->dwDDFX : 0);
6494
6495     return E_NOTIMPL;
6496 }
6497
6498 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6499         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6500         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
6501 {
6502     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6503     const struct wined3d_format *src_format, *dst_format;
6504     struct wined3d_surface *orig_src = src_surface;
6505     struct wined3d_mapped_rect dst_map, src_map;
6506     HRESULT hr = WINED3D_OK;
6507     const BYTE *sbuf;
6508     RECT xdst,xsrc;
6509     BYTE *dbuf;
6510     int x, y;
6511
6512     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6513             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6514             flags, fx, debug_d3dtexturefiltertype(filter));
6515
6516     xsrc = *src_rect;
6517
6518     if (!src_surface)
6519     {
6520         RECT full_rect;
6521
6522         full_rect.left = 0;
6523         full_rect.top = 0;
6524         full_rect.right = dst_surface->resource.width;
6525         full_rect.bottom = dst_surface->resource.height;
6526         IntersectRect(&xdst, &full_rect, dst_rect);
6527     }
6528     else
6529     {
6530         BOOL clip_horiz, clip_vert;
6531
6532         xdst = *dst_rect;
6533         clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6534         clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6535
6536         if (clip_vert || clip_horiz)
6537         {
6538             /* Now check if this is a special case or not... */
6539             if ((flags & WINEDDBLT_DDFX)
6540                     || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6541                     || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6542             {
6543                 WARN("Out of screen rectangle in special case. Not handled right now.\n");
6544                 return WINED3D_OK;
6545             }
6546
6547             if (clip_horiz)
6548             {
6549                 if (xdst.left < 0)
6550                 {
6551                     xsrc.left -= xdst.left;
6552                     xdst.left = 0;
6553                 }
6554                 if (xdst.right > dst_surface->resource.width)
6555                 {
6556                     xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6557                     xdst.right = (int)dst_surface->resource.width;
6558                 }
6559             }
6560
6561             if (clip_vert)
6562             {
6563                 if (xdst.top < 0)
6564                 {
6565                     xsrc.top -= xdst.top;
6566                     xdst.top = 0;
6567                 }
6568                 if (xdst.bottom > dst_surface->resource.height)
6569                 {
6570                     xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6571                     xdst.bottom = (int)dst_surface->resource.height;
6572                 }
6573             }
6574
6575             /* And check if after clipping something is still to be done... */
6576             if ((xdst.right <= 0) || (xdst.bottom <= 0)
6577                     || (xdst.left >= (int)dst_surface->resource.width)
6578                     || (xdst.top >= (int)dst_surface->resource.height)
6579                     || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6580                     || (xsrc.left >= (int)src_surface->resource.width)
6581                     || (xsrc.top >= (int)src_surface->resource.height))
6582             {
6583                 TRACE("Nothing to be done after clipping.\n");
6584                 return WINED3D_OK;
6585             }
6586         }
6587     }
6588
6589     if (src_surface == dst_surface)
6590     {
6591         wined3d_surface_map(dst_surface, &dst_map, NULL, 0);
6592         src_map = dst_map;
6593         src_format = dst_surface->resource.format;
6594         dst_format = src_format;
6595     }
6596     else
6597     {
6598         dst_format = dst_surface->resource.format;
6599         if (src_surface)
6600         {
6601             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6602             {
6603                 src_surface = surface_convert_format(src_surface, dst_format->id);
6604                 if (!src_surface)
6605                 {
6606                     /* The conv function writes a FIXME */
6607                     WARN("Cannot convert source surface format to dest format.\n");
6608                     goto release;
6609                 }
6610             }
6611             wined3d_surface_map(src_surface, &src_map, NULL, WINED3DLOCK_READONLY);
6612             src_format = src_surface->resource.format;
6613         }
6614         else
6615         {
6616             src_format = dst_format;
6617         }
6618         if (dst_rect)
6619             wined3d_surface_map(dst_surface, &dst_map, &xdst, 0);
6620         else
6621             wined3d_surface_map(dst_surface, &dst_map, NULL, 0);
6622     }
6623
6624     bpp = dst_surface->resource.format->byte_count;
6625     srcheight = xsrc.bottom - xsrc.top;
6626     srcwidth = xsrc.right - xsrc.left;
6627     dstheight = xdst.bottom - xdst.top;
6628     dstwidth = xdst.right - xdst.left;
6629     width = (xdst.right - xdst.left) * bpp;
6630
6631     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_BLOCKS)
6632     {
6633         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6634
6635         if (src_surface == dst_surface)
6636         {
6637             FIXME("Only plain blits supported on compressed surfaces.\n");
6638             hr = E_NOTIMPL;
6639             goto release;
6640         }
6641
6642         if (srcheight != dstheight || srcwidth != dstwidth)
6643         {
6644             WARN("Stretching not supported on compressed surfaces.\n");
6645             hr = WINED3DERR_INVALIDCALL;
6646             goto release;
6647         }
6648
6649         if (srcwidth & (src_format->block_width - 1) || srcheight & (src_format->block_height - 1))
6650         {
6651             WARN("Rectangle not block-aligned.\n");
6652             hr = WINED3DERR_INVALIDCALL;
6653             goto release;
6654         }
6655
6656         hr = surface_cpu_blt_compressed(src_map.data, dst_map.data,
6657                 src_map.row_pitch, dst_map.row_pitch, dstwidth, dstheight,
6658                 src_format, flags, fx);
6659         goto release;
6660     }
6661
6662     if (dst_rect && src_surface != dst_surface)
6663         dbuf = dst_map.data;
6664     else
6665         dbuf = (BYTE *)dst_map.data + (xdst.top * dst_map.row_pitch) + (xdst.left * bpp);
6666
6667     /* First, all the 'source-less' blits */
6668     if (flags & WINEDDBLT_COLORFILL)
6669     {
6670         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, fx->u5.dwFillColor);
6671         flags &= ~WINEDDBLT_COLORFILL;
6672     }
6673
6674     if (flags & WINEDDBLT_DEPTHFILL)
6675     {
6676         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6677     }
6678     if (flags & WINEDDBLT_ROP)
6679     {
6680         /* Catch some degenerate cases here. */
6681         switch (fx->dwROP)
6682         {
6683             case BLACKNESS:
6684                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, 0);
6685                 break;
6686             case 0xAA0029: /* No-op */
6687                 break;
6688             case WHITENESS:
6689                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, ~0U);
6690                 break;
6691             case SRCCOPY: /* Well, we do that below? */
6692                 break;
6693             default:
6694                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6695                 goto error;
6696         }
6697         flags &= ~WINEDDBLT_ROP;
6698     }
6699     if (flags & WINEDDBLT_DDROPS)
6700     {
6701         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6702     }
6703     /* Now the 'with source' blits. */
6704     if (src_surface)
6705     {
6706         const BYTE *sbase;
6707         int sx, xinc, sy, yinc;
6708
6709         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6710             goto release;
6711
6712         if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
6713                 && (srcwidth != dstwidth || srcheight != dstheight))
6714         {
6715             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6716             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6717         }
6718
6719         sbase = (BYTE *)src_map.data + (xsrc.top * src_map.row_pitch) + xsrc.left * bpp;
6720         xinc = (srcwidth << 16) / dstwidth;
6721         yinc = (srcheight << 16) / dstheight;
6722
6723         if (!flags)
6724         {
6725             /* No effects, we can cheat here. */
6726             if (dstwidth == srcwidth)
6727             {
6728                 if (dstheight == srcheight)
6729                 {
6730                     /* No stretching in either direction. This needs to be as
6731                      * fast as possible. */
6732                     sbuf = sbase;
6733
6734                     /* Check for overlapping surfaces. */
6735                     if (src_surface != dst_surface || xdst.top < xsrc.top
6736                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6737                     {
6738                         /* No overlap, or dst above src, so copy from top downwards. */
6739                         for (y = 0; y < dstheight; ++y)
6740                         {
6741                             memcpy(dbuf, sbuf, width);
6742                             sbuf += src_map.row_pitch;
6743                             dbuf += dst_map.row_pitch;
6744                         }
6745                     }
6746                     else if (xdst.top > xsrc.top)
6747                     {
6748                         /* Copy from bottom upwards. */
6749                         sbuf += src_map.row_pitch * dstheight;
6750                         dbuf += dst_map.row_pitch * dstheight;
6751                         for (y = 0; y < dstheight; ++y)
6752                         {
6753                             sbuf -= src_map.row_pitch;
6754                             dbuf -= dst_map.row_pitch;
6755                             memcpy(dbuf, sbuf, width);
6756                         }
6757                     }
6758                     else
6759                     {
6760                         /* Src and dst overlapping on the same line, use memmove. */
6761                         for (y = 0; y < dstheight; ++y)
6762                         {
6763                             memmove(dbuf, sbuf, width);
6764                             sbuf += src_map.row_pitch;
6765                             dbuf += dst_map.row_pitch;
6766                         }
6767                     }
6768                 }
6769                 else
6770                 {
6771                     /* Stretching in y direction only. */
6772                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6773                     {
6774                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6775                         memcpy(dbuf, sbuf, width);
6776                         dbuf += dst_map.row_pitch;
6777                     }
6778                 }
6779             }
6780             else
6781             {
6782                 /* Stretching in X direction. */
6783                 int last_sy = -1;
6784                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6785                 {
6786                     sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6787
6788                     if ((sy >> 16) == (last_sy >> 16))
6789                     {
6790                         /* This source row is the same as last source row -
6791                          * Copy the already stretched row. */
6792                         memcpy(dbuf, dbuf - dst_map.row_pitch, width);
6793                     }
6794                     else
6795                     {
6796 #define STRETCH_ROW(type) \
6797 do { \
6798     const type *s = (const type *)sbuf; \
6799     type *d = (type *)dbuf; \
6800     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6801         d[x] = s[sx >> 16]; \
6802 } while(0)
6803
6804                         switch(bpp)
6805                         {
6806                             case 1:
6807                                 STRETCH_ROW(BYTE);
6808                                 break;
6809                             case 2:
6810                                 STRETCH_ROW(WORD);
6811                                 break;
6812                             case 4:
6813                                 STRETCH_ROW(DWORD);
6814                                 break;
6815                             case 3:
6816                             {
6817                                 const BYTE *s;
6818                                 BYTE *d = dbuf;
6819                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6820                                 {
6821                                     DWORD pixel;
6822
6823                                     s = sbuf + 3 * (sx >> 16);
6824                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6825                                     d[0] = (pixel      ) & 0xff;
6826                                     d[1] = (pixel >>  8) & 0xff;
6827                                     d[2] = (pixel >> 16) & 0xff;
6828                                     d += 3;
6829                                 }
6830                                 break;
6831                             }
6832                             default:
6833                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6834                                 hr = WINED3DERR_NOTAVAILABLE;
6835                                 goto error;
6836                         }
6837 #undef STRETCH_ROW
6838                     }
6839                     dbuf += dst_map.row_pitch;
6840                     last_sy = sy;
6841                 }
6842             }
6843         }
6844         else
6845         {
6846             LONG dstyinc = dst_map.row_pitch, dstxinc = bpp;
6847             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6848             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6849             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6850             {
6851                 /* The color keying flags are checked for correctness in ddraw */
6852                 if (flags & WINEDDBLT_KEYSRC)
6853                 {
6854                     keylow  = src_surface->src_blt_color_key.color_space_low_value;
6855                     keyhigh = src_surface->src_blt_color_key.color_space_high_value;
6856                 }
6857                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6858                 {
6859                     keylow = fx->ddckSrcColorkey.color_space_low_value;
6860                     keyhigh = fx->ddckSrcColorkey.color_space_high_value;
6861                 }
6862
6863                 if (flags & WINEDDBLT_KEYDEST)
6864                 {
6865                     /* Destination color keys are taken from the source surface! */
6866                     destkeylow = src_surface->dst_blt_color_key.color_space_low_value;
6867                     destkeyhigh = src_surface->dst_blt_color_key.color_space_high_value;
6868                 }
6869                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6870                 {
6871                     destkeylow = fx->ddckDestColorkey.color_space_low_value;
6872                     destkeyhigh = fx->ddckDestColorkey.color_space_high_value;
6873                 }
6874
6875                 if (bpp == 1)
6876                 {
6877                     keymask = 0xff;
6878                 }
6879                 else
6880                 {
6881                     keymask = src_format->red_mask
6882                             | src_format->green_mask
6883                             | src_format->blue_mask;
6884                 }
6885                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6886             }
6887
6888             if (flags & WINEDDBLT_DDFX)
6889             {
6890                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6891                 LONG tmpxy;
6892                 dTopLeft     = dbuf;
6893                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6894                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dst_map.row_pitch);
6895                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6896
6897                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6898                 {
6899                     /* I don't think we need to do anything about this flag */
6900                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6901                 }
6902                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6903                 {
6904                     tmp          = dTopRight;
6905                     dTopRight    = dTopLeft;
6906                     dTopLeft     = tmp;
6907                     tmp          = dBottomRight;
6908                     dBottomRight = dBottomLeft;
6909                     dBottomLeft  = tmp;
6910                     dstxinc = dstxinc * -1;
6911                 }
6912                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6913                 {
6914                     tmp          = dTopLeft;
6915                     dTopLeft     = dBottomLeft;
6916                     dBottomLeft  = tmp;
6917                     tmp          = dTopRight;
6918                     dTopRight    = dBottomRight;
6919                     dBottomRight = tmp;
6920                     dstyinc = dstyinc * -1;
6921                 }
6922                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6923                 {
6924                     /* I don't think we need to do anything about this flag */
6925                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6926                 }
6927                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6928                 {
6929                     tmp          = dBottomRight;
6930                     dBottomRight = dTopLeft;
6931                     dTopLeft     = tmp;
6932                     tmp          = dBottomLeft;
6933                     dBottomLeft  = dTopRight;
6934                     dTopRight    = tmp;
6935                     dstxinc = dstxinc * -1;
6936                     dstyinc = dstyinc * -1;
6937                 }
6938                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6939                 {
6940                     tmp          = dTopLeft;
6941                     dTopLeft     = dBottomLeft;
6942                     dBottomLeft  = dBottomRight;
6943                     dBottomRight = dTopRight;
6944                     dTopRight    = tmp;
6945                     tmpxy   = dstxinc;
6946                     dstxinc = dstyinc;
6947                     dstyinc = tmpxy;
6948                     dstxinc = dstxinc * -1;
6949                 }
6950                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6951                 {
6952                     tmp          = dTopLeft;
6953                     dTopLeft     = dTopRight;
6954                     dTopRight    = dBottomRight;
6955                     dBottomRight = dBottomLeft;
6956                     dBottomLeft  = tmp;
6957                     tmpxy   = dstxinc;
6958                     dstxinc = dstyinc;
6959                     dstyinc = tmpxy;
6960                     dstyinc = dstyinc * -1;
6961                 }
6962                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6963                 {
6964                     /* I don't think we need to do anything about this flag */
6965                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6966                 }
6967                 dbuf = dTopLeft;
6968                 flags &= ~(WINEDDBLT_DDFX);
6969             }
6970
6971 #define COPY_COLORKEY_FX(type) \
6972 do { \
6973     const type *s; \
6974     type *d = (type *)dbuf, *dx, tmp; \
6975     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
6976     { \
6977         s = (const type *)(sbase + (sy >> 16) * src_map.row_pitch); \
6978         dx = d; \
6979         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6980         { \
6981             tmp = s[sx >> 16]; \
6982             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
6983                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
6984             { \
6985                 dx[0] = tmp; \
6986             } \
6987             dx = (type *)(((BYTE *)dx) + dstxinc); \
6988         } \
6989         d = (type *)(((BYTE *)d) + dstyinc); \
6990     } \
6991 } while(0)
6992
6993             switch (bpp)
6994             {
6995                 case 1:
6996                     COPY_COLORKEY_FX(BYTE);
6997                     break;
6998                 case 2:
6999                     COPY_COLORKEY_FX(WORD);
7000                     break;
7001                 case 4:
7002                     COPY_COLORKEY_FX(DWORD);
7003                     break;
7004                 case 3:
7005                 {
7006                     const BYTE *s;
7007                     BYTE *d = dbuf, *dx;
7008                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
7009                     {
7010                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
7011                         dx = d;
7012                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7013                         {
7014                             DWORD pixel, dpixel = 0;
7015                             s = sbuf + 3 * (sx>>16);
7016                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7017                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7018                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7019                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7020                             {
7021                                 dx[0] = (pixel      ) & 0xff;
7022                                 dx[1] = (pixel >>  8) & 0xff;
7023                                 dx[2] = (pixel >> 16) & 0xff;
7024                             }
7025                             dx += dstxinc;
7026                         }
7027                         d += dstyinc;
7028                     }
7029                     break;
7030                 }
7031                 default:
7032                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7033                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7034                     hr = WINED3DERR_NOTAVAILABLE;
7035                     goto error;
7036 #undef COPY_COLORKEY_FX
7037             }
7038         }
7039     }
7040
7041 error:
7042     if (flags && FIXME_ON(d3d_surface))
7043     {
7044         FIXME("\tUnsupported flags: %#x.\n", flags);
7045     }
7046
7047 release:
7048     wined3d_surface_unmap(dst_surface);
7049     if (src_surface && src_surface != dst_surface)
7050         wined3d_surface_unmap(src_surface);
7051     /* Release the converted surface, if any. */
7052     if (src_surface && src_surface != orig_src)
7053         wined3d_surface_decref(src_surface);
7054
7055     return hr;
7056 }
7057
7058 /* Do not call while under the GL lock. */
7059 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7060         const RECT *dst_rect, const struct wined3d_color *color)
7061 {
7062     static const RECT src_rect;
7063     WINEDDBLTFX BltFx;
7064
7065     memset(&BltFx, 0, sizeof(BltFx));
7066     BltFx.dwSize = sizeof(BltFx);
7067     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7068     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7069             WINEDDBLT_COLORFILL, &BltFx, WINED3DTEXF_POINT);
7070 }
7071
7072 /* Do not call while under the GL lock. */
7073 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7074         struct wined3d_surface *surface, const RECT *rect, float depth)
7075 {
7076     FIXME("Depth filling not implemented by cpu_blit.\n");
7077     return WINED3DERR_INVALIDCALL;
7078 }
7079
7080 const struct blit_shader cpu_blit =  {
7081     cpu_blit_alloc,
7082     cpu_blit_free,
7083     cpu_blit_set,
7084     cpu_blit_unset,
7085     cpu_blit_supported,
7086     cpu_blit_color_fill,
7087     cpu_blit_depth_fill,
7088 };
7089
7090 static HRESULT surface_init(struct wined3d_surface *surface, WINED3DSURFTYPE surface_type, UINT alignment,
7091         UINT width, UINT height, UINT level, WINED3DMULTISAMPLE_TYPE multisample_type,
7092         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7093         WINED3DPOOL pool, DWORD flags, void *parent, const struct wined3d_parent_ops *parent_ops)
7094 {
7095     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7096     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7097     BOOL lockable = flags & WINED3D_SURFACE_MAPPABLE;
7098     unsigned int resource_size;
7099     HRESULT hr;
7100
7101     if (multisample_quality > 0)
7102     {
7103         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7104         multisample_quality = 0;
7105     }
7106
7107     /* Quick lockable sanity check.
7108      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7109      * this function is too deep to need to care about things like this.
7110      * Levels need to be checked too, since they all affect what can be done. */
7111     switch (pool)
7112     {
7113         case WINED3DPOOL_SCRATCH:
7114             if (!lockable)
7115             {
7116                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7117                         "which are mutually exclusive, setting lockable to TRUE.\n");
7118                 lockable = TRUE;
7119             }
7120             break;
7121
7122         case WINED3DPOOL_SYSTEMMEM:
7123             if (!lockable)
7124                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7125             break;
7126
7127         case WINED3DPOOL_MANAGED:
7128             if (usage & WINED3DUSAGE_DYNAMIC)
7129                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7130             break;
7131
7132         case WINED3DPOOL_DEFAULT:
7133             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7134                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7135             break;
7136
7137         default:
7138             FIXME("Unknown pool %#x.\n", pool);
7139             break;
7140     };
7141
7142     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3DPOOL_DEFAULT)
7143         FIXME("Trying to create a render target that isn't in the default pool.\n");
7144
7145     /* FIXME: Check that the format is supported by the device. */
7146
7147     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7148     if (!resource_size)
7149         return WINED3DERR_INVALIDCALL;
7150
7151     surface->surface_type = surface_type;
7152
7153     switch (surface_type)
7154     {
7155         case SURFACE_OPENGL:
7156             surface->surface_ops = &surface_ops;
7157             break;
7158
7159         case SURFACE_GDI:
7160             surface->surface_ops = &gdi_surface_ops;
7161             break;
7162
7163         default:
7164             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7165             return WINED3DERR_INVALIDCALL;
7166     }
7167
7168     hr = resource_init(&surface->resource, device, WINED3DRTYPE_SURFACE, format,
7169             multisample_type, multisample_quality, usage, pool, width, height, 1,
7170             resource_size, parent, parent_ops, &surface_resource_ops);
7171     if (FAILED(hr))
7172     {
7173         WARN("Failed to initialize resource, returning %#x.\n", hr);
7174         return hr;
7175     }
7176
7177     /* "Standalone" surface. */
7178     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7179
7180     surface->texture_level = level;
7181     list_init(&surface->overlays);
7182
7183     /* Flags */
7184     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7185     if (flags & WINED3D_SURFACE_DISCARD)
7186         surface->flags |= SFLAG_DISCARD;
7187     if (flags & WINED3D_SURFACE_PIN_SYSMEM)
7188         surface->flags |= SFLAG_PIN_SYSMEM;
7189     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7190         surface->flags |= SFLAG_LOCKABLE;
7191     /* I'm not sure if this qualifies as a hack or as an optimization. It
7192      * seems reasonable to assume that lockable render targets will get
7193      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7194      * creation. However, the other reason we want to do this is that several
7195      * ddraw applications access surface memory while the surface isn't
7196      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7197      * future locks prevents these from crashing. */
7198     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7199         surface->flags |= SFLAG_DYNLOCK;
7200
7201     /* Mark the texture as dirty so that it gets loaded first time around. */
7202     surface_add_dirty_rect(surface, NULL);
7203     list_init(&surface->renderbuffers);
7204
7205     TRACE("surface %p, memory %p, size %u\n",
7206             surface, surface->resource.allocatedMemory, surface->resource.size);
7207
7208     /* Call the private setup routine */
7209     hr = surface->surface_ops->surface_private_setup(surface);
7210     if (FAILED(hr))
7211     {
7212         ERR("Private setup failed, returning %#x\n", hr);
7213         surface_cleanup(surface);
7214         return hr;
7215     }
7216
7217     /* Similar to lockable rendertargets above, creating the DIB section
7218      * during surface initialization prevents the sysmem pointer from changing
7219      * after a wined3d_surface_getdc() call. */
7220     if ((usage & WINED3DUSAGE_OWNDC) && !surface->hDC
7221             && SUCCEEDED(surface_create_dib_section(surface)))
7222     {
7223         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
7224         surface->resource.heapMemory = NULL;
7225         surface->resource.allocatedMemory = surface->dib.bitmap_data;
7226     }
7227
7228     return hr;
7229 }
7230
7231 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7232         enum wined3d_format_id format_id, UINT level, DWORD usage, WINED3DPOOL pool,
7233         WINED3DMULTISAMPLE_TYPE multisample_type, DWORD multisample_quality, WINED3DSURFTYPE surface_type,
7234         DWORD flags, void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7235 {
7236     struct wined3d_surface *object;
7237     HRESULT hr;
7238
7239     TRACE("device %p, width %u, height %u, format %s, level %u\n",
7240             device, width, height, debug_d3dformat(format_id), level);
7241     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7242             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7243     TRACE("surface_type %#x, flags %#x, parent %p, parent_ops %p.\n", surface_type, flags, parent, parent_ops);
7244
7245     if (surface_type == SURFACE_OPENGL && !device->adapter)
7246     {
7247         ERR("OpenGL surfaces are not available without OpenGL.\n");
7248         return WINED3DERR_NOTAVAILABLE;
7249     }
7250
7251     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7252     if (!object)
7253     {
7254         ERR("Failed to allocate surface memory.\n");
7255         return WINED3DERR_OUTOFVIDEOMEMORY;
7256     }
7257
7258     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level,
7259             multisample_type, multisample_quality, device, usage, format_id, pool, flags, parent, parent_ops);
7260     if (FAILED(hr))
7261     {
7262         WARN("Failed to initialize surface, returning %#x.\n", hr);
7263         HeapFree(GetProcessHeap(), 0, object);
7264         return hr;
7265     }
7266
7267     TRACE("Created surface %p.\n", object);
7268     *surface = object;
7269
7270     return hr;
7271 }