jscript: Use bytecode for '|=' expression.
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2011 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         WINED3DTEXTUREFILTERTYPE filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     struct wined3d_surface *overlay, *cur;
46
47     TRACE("surface %p.\n", surface);
48
49     if (surface->texture_name || (surface->flags & SFLAG_PBO)
50              || surface->rb_multisample || surface->rb_resolved
51              || !list_empty(&surface->renderbuffers))
52     {
53         struct wined3d_renderbuffer_entry *entry, *entry2;
54         const struct wined3d_gl_info *gl_info;
55         struct wined3d_context *context;
56
57         context = context_acquire(surface->resource.device, NULL);
58         gl_info = context->gl_info;
59
60         ENTER_GL();
61
62         if (surface->texture_name)
63         {
64             TRACE("Deleting texture %u.\n", surface->texture_name);
65             glDeleteTextures(1, &surface->texture_name);
66         }
67
68         if (surface->flags & SFLAG_PBO)
69         {
70             TRACE("Deleting PBO %u.\n", surface->pbo);
71             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
72         }
73
74         if (surface->rb_multisample)
75         {
76             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
77             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
78         }
79
80         if (surface->rb_resolved)
81         {
82             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
83             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
84         }
85
86         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
87         {
88             TRACE("Deleting renderbuffer %u.\n", entry->id);
89             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
90             HeapFree(GetProcessHeap(), 0, entry);
91         }
92
93         LEAVE_GL();
94
95         context_release(context);
96     }
97
98     if (surface->flags & SFLAG_DIBSECTION)
99     {
100         DeleteDC(surface->hDC);
101         DeleteObject(surface->dib.DIBsection);
102         surface->dib.bitmap_data = NULL;
103         surface->resource.allocatedMemory = NULL;
104     }
105
106     if (surface->flags & SFLAG_USERPTR)
107         wined3d_surface_set_mem(surface, NULL);
108     if (surface->overlay_dest)
109         list_remove(&surface->overlay_entry);
110
111     LIST_FOR_EACH_ENTRY_SAFE(overlay, cur, &surface->overlays, struct wined3d_surface, overlay_entry)
112     {
113         list_remove(&overlay->overlay_entry);
114         overlay->overlay_dest = NULL;
115     }
116
117     resource_cleanup(&surface->resource);
118 }
119
120 void surface_update_draw_binding(struct wined3d_surface *surface)
121 {
122     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
123         surface->draw_binding = SFLAG_INDRAWABLE;
124     else if (surface->resource.multisample_type)
125         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
126     else
127         surface->draw_binding = SFLAG_INTEXTURE;
128 }
129
130 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
131 {
132     TRACE("surface %p, container %p.\n", surface, container);
133
134     if (!container && type != WINED3D_CONTAINER_NONE)
135         ERR("Setting NULL container of type %#x.\n", type);
136
137     if (type == WINED3D_CONTAINER_SWAPCHAIN)
138     {
139         surface->get_drawable_size = get_drawable_size_swapchain;
140     }
141     else
142     {
143         switch (wined3d_settings.offscreen_rendering_mode)
144         {
145             case ORM_FBO:
146                 surface->get_drawable_size = get_drawable_size_fbo;
147                 break;
148
149             case ORM_BACKBUFFER:
150                 surface->get_drawable_size = get_drawable_size_backbuffer;
151                 break;
152
153             default:
154                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
155                 return;
156         }
157     }
158
159     surface->container.type = type;
160     surface->container.u.base = container;
161     surface_update_draw_binding(surface);
162 }
163
164 struct blt_info
165 {
166     GLenum binding;
167     GLenum bind_target;
168     enum tex_types tex_type;
169     GLfloat coords[4][3];
170 };
171
172 struct float_rect
173 {
174     float l;
175     float t;
176     float r;
177     float b;
178 };
179
180 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
181 {
182     f->l = ((r->left * 2.0f) / w) - 1.0f;
183     f->t = ((r->top * 2.0f) / h) - 1.0f;
184     f->r = ((r->right * 2.0f) / w) - 1.0f;
185     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
186 }
187
188 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
189 {
190     GLfloat (*coords)[3] = info->coords;
191     struct float_rect f;
192
193     switch (target)
194     {
195         default:
196             FIXME("Unsupported texture target %#x\n", target);
197             /* Fall back to GL_TEXTURE_2D */
198         case GL_TEXTURE_2D:
199             info->binding = GL_TEXTURE_BINDING_2D;
200             info->bind_target = GL_TEXTURE_2D;
201             info->tex_type = tex_2d;
202             coords[0][0] = (float)rect->left / w;
203             coords[0][1] = (float)rect->top / h;
204             coords[0][2] = 0.0f;
205
206             coords[1][0] = (float)rect->right / w;
207             coords[1][1] = (float)rect->top / h;
208             coords[1][2] = 0.0f;
209
210             coords[2][0] = (float)rect->left / w;
211             coords[2][1] = (float)rect->bottom / h;
212             coords[2][2] = 0.0f;
213
214             coords[3][0] = (float)rect->right / w;
215             coords[3][1] = (float)rect->bottom / h;
216             coords[3][2] = 0.0f;
217             break;
218
219         case GL_TEXTURE_RECTANGLE_ARB:
220             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
221             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
222             info->tex_type = tex_rect;
223             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
224             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
225             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
226             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
227             break;
228
229         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
230             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
231             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
232             info->tex_type = tex_cube;
233             cube_coords_float(rect, w, h, &f);
234
235             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
236             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
237             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
238             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
239             break;
240
241         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
242             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
243             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
244             info->tex_type = tex_cube;
245             cube_coords_float(rect, w, h, &f);
246
247             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
248             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
249             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
250             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
251             break;
252
253         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
254             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
255             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
256             info->tex_type = tex_cube;
257             cube_coords_float(rect, w, h, &f);
258
259             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
260             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
261             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
262             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
263             break;
264
265         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
266             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
267             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
268             info->tex_type = tex_cube;
269             cube_coords_float(rect, w, h, &f);
270
271             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
272             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
273             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
274             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
275             break;
276
277         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
278             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
279             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
280             info->tex_type = tex_cube;
281             cube_coords_float(rect, w, h, &f);
282
283             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
284             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
285             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
286             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
287             break;
288
289         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
290             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
291             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
292             info->tex_type = tex_cube;
293             cube_coords_float(rect, w, h, &f);
294
295             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
296             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
297             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
298             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
299             break;
300     }
301 }
302
303 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
304 {
305     if (rect_in)
306         *rect_out = *rect_in;
307     else
308     {
309         rect_out->left = 0;
310         rect_out->top = 0;
311         rect_out->right = surface->resource.width;
312         rect_out->bottom = surface->resource.height;
313     }
314 }
315
316 /* GL locking and context activation is done by the caller */
317 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
318         const RECT *src_rect, const RECT *dst_rect, WINED3DTEXTUREFILTERTYPE Filter)
319 {
320     struct blt_info info;
321
322     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
323
324     glEnable(info.bind_target);
325     checkGLcall("glEnable(bind_target)");
326
327     context_bind_texture(context, info.bind_target, src_surface->texture_name);
328
329     /* Filtering for StretchRect */
330     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
331             wined3d_gl_mag_filter(magLookup, Filter));
332     checkGLcall("glTexParameteri");
333     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
334             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
335     checkGLcall("glTexParameteri");
336     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
337     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
338     if (context->gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
339         glTexParameteri(info.bind_target, GL_TEXTURE_SRGB_DECODE_EXT, GL_SKIP_DECODE_EXT);
340     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
341     checkGLcall("glTexEnvi");
342
343     /* Draw a quad */
344     glBegin(GL_TRIANGLE_STRIP);
345     glTexCoord3fv(info.coords[0]);
346     glVertex2i(dst_rect->left, dst_rect->top);
347
348     glTexCoord3fv(info.coords[1]);
349     glVertex2i(dst_rect->right, dst_rect->top);
350
351     glTexCoord3fv(info.coords[2]);
352     glVertex2i(dst_rect->left, dst_rect->bottom);
353
354     glTexCoord3fv(info.coords[3]);
355     glVertex2i(dst_rect->right, dst_rect->bottom);
356     glEnd();
357
358     /* Unbind the texture */
359     context_bind_texture(context, info.bind_target, 0);
360
361     /* We changed the filtering settings on the texture. Inform the
362      * container about this to get the filters reset properly next draw. */
363     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
364     {
365         struct wined3d_texture *texture = src_surface->container.u.texture;
366         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3DTEXF_POINT;
367         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3DTEXF_POINT;
368         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3DTEXF_NONE;
369         texture->texture_rgb.states[WINED3DTEXSTA_SRGBTEXTURE] = FALSE;
370     }
371 }
372
373 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
374 {
375     const struct wined3d_format *format = surface->resource.format;
376     SYSTEM_INFO sysInfo;
377     BITMAPINFO *b_info;
378     int extraline = 0;
379     DWORD *masks;
380     UINT usage;
381     HDC dc;
382
383     TRACE("surface %p.\n", surface);
384
385     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
386     {
387         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
388         return WINED3DERR_INVALIDCALL;
389     }
390
391     switch (format->byte_count)
392     {
393         case 2:
394         case 4:
395             /* Allocate extra space to store the RGB bit masks. */
396             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
397             break;
398
399         case 3:
400             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
401             break;
402
403         default:
404             /* Allocate extra space for a palette. */
405             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
406                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
407             break;
408     }
409
410     if (!b_info)
411         return E_OUTOFMEMORY;
412
413     /* Some applications access the surface in via DWORDs, and do not take
414      * the necessary care at the end of the surface. So we need at least
415      * 4 extra bytes at the end of the surface. Check against the page size,
416      * if the last page used for the surface has at least 4 spare bytes we're
417      * safe, otherwise add an extra line to the DIB section. */
418     GetSystemInfo(&sysInfo);
419     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
420     {
421         extraline = 1;
422         TRACE("Adding an extra line to the DIB section.\n");
423     }
424
425     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
426     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
427     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
428     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
429     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
430             * wined3d_surface_get_pitch(surface);
431     b_info->bmiHeader.biPlanes = 1;
432     b_info->bmiHeader.biBitCount = format->byte_count * 8;
433
434     b_info->bmiHeader.biXPelsPerMeter = 0;
435     b_info->bmiHeader.biYPelsPerMeter = 0;
436     b_info->bmiHeader.biClrUsed = 0;
437     b_info->bmiHeader.biClrImportant = 0;
438
439     /* Get the bit masks */
440     masks = (DWORD *)b_info->bmiColors;
441     switch (surface->resource.format->id)
442     {
443         case WINED3DFMT_B8G8R8_UNORM:
444             usage = DIB_RGB_COLORS;
445             b_info->bmiHeader.biCompression = BI_RGB;
446             break;
447
448         case WINED3DFMT_B5G5R5X1_UNORM:
449         case WINED3DFMT_B5G5R5A1_UNORM:
450         case WINED3DFMT_B4G4R4A4_UNORM:
451         case WINED3DFMT_B4G4R4X4_UNORM:
452         case WINED3DFMT_B2G3R3_UNORM:
453         case WINED3DFMT_B2G3R3A8_UNORM:
454         case WINED3DFMT_R10G10B10A2_UNORM:
455         case WINED3DFMT_R8G8B8A8_UNORM:
456         case WINED3DFMT_R8G8B8X8_UNORM:
457         case WINED3DFMT_B10G10R10A2_UNORM:
458         case WINED3DFMT_B5G6R5_UNORM:
459         case WINED3DFMT_R16G16B16A16_UNORM:
460             usage = 0;
461             b_info->bmiHeader.biCompression = BI_BITFIELDS;
462             masks[0] = format->red_mask;
463             masks[1] = format->green_mask;
464             masks[2] = format->blue_mask;
465             break;
466
467         default:
468             /* Don't know palette */
469             b_info->bmiHeader.biCompression = BI_RGB;
470             usage = 0;
471             break;
472     }
473
474     if (!(dc = GetDC(0)))
475     {
476         HeapFree(GetProcessHeap(), 0, b_info);
477         return HRESULT_FROM_WIN32(GetLastError());
478     }
479
480     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
481             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
482             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
483     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
484     ReleaseDC(0, dc);
485
486     if (!surface->dib.DIBsection)
487     {
488         ERR("Failed to create DIB section.\n");
489         HeapFree(GetProcessHeap(), 0, b_info);
490         return HRESULT_FROM_WIN32(GetLastError());
491     }
492
493     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
494     /* Copy the existing surface to the dib section. */
495     if (surface->resource.allocatedMemory)
496     {
497         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
498                 surface->resource.height * wined3d_surface_get_pitch(surface));
499     }
500     else
501     {
502         /* This is to make maps read the GL texture although memory is allocated. */
503         surface->flags &= ~SFLAG_INSYSMEM;
504     }
505     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
506
507     HeapFree(GetProcessHeap(), 0, b_info);
508
509     /* Now allocate a DC. */
510     surface->hDC = CreateCompatibleDC(0);
511     SelectObject(surface->hDC, surface->dib.DIBsection);
512     TRACE("Using wined3d palette %p.\n", surface->palette);
513     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
514
515     surface->flags |= SFLAG_DIBSECTION;
516
517     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
518     surface->resource.heapMemory = NULL;
519
520     return WINED3D_OK;
521 }
522
523 static BOOL surface_need_pbo(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
524 {
525     if (surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
526         return FALSE;
527     if (!(surface->flags & SFLAG_DYNLOCK))
528         return FALSE;
529     if (surface->flags & (SFLAG_CONVERTED | SFLAG_NONPOW2 | SFLAG_PIN_SYSMEM))
530         return FALSE;
531     if (!gl_info->supported[ARB_PIXEL_BUFFER_OBJECT])
532         return FALSE;
533
534     return TRUE;
535 }
536
537 static void surface_load_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
538 {
539     struct wined3d_context *context;
540     GLenum error;
541
542     context = context_acquire(surface->resource.device, NULL);
543     ENTER_GL();
544
545     GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
546     error = glGetError();
547     if (!surface->pbo || error != GL_NO_ERROR)
548         ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
549
550     TRACE("Binding PBO %u.\n", surface->pbo);
551
552     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
553     checkGLcall("glBindBufferARB");
554
555     GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
556             surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
557     checkGLcall("glBufferDataARB");
558
559     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
560     checkGLcall("glBindBufferARB");
561
562     /* We don't need the system memory anymore and we can't even use it for PBOs. */
563     if (!(surface->flags & SFLAG_CLIENT))
564     {
565         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
566         surface->resource.heapMemory = NULL;
567     }
568     surface->resource.allocatedMemory = NULL;
569     surface->flags |= SFLAG_PBO;
570     LEAVE_GL();
571     context_release(context);
572 }
573
574 static void surface_prepare_system_memory(struct wined3d_surface *surface)
575 {
576     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
577
578     TRACE("surface %p.\n", surface);
579
580     if (!(surface->flags & SFLAG_PBO) && surface_need_pbo(surface, gl_info))
581         surface_load_pbo(surface, gl_info);
582     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
583     {
584         /* Whatever surface we have, make sure that there is memory allocated
585          * for the downloaded copy, or a PBO to map. */
586         if (!surface->resource.heapMemory)
587             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
588
589         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
590                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
591
592         if (surface->flags & SFLAG_INSYSMEM)
593             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
594     }
595 }
596
597 static void surface_evict_sysmem(struct wined3d_surface *surface)
598 {
599     if (surface->flags & SFLAG_DONOTFREE)
600         return;
601
602     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
603     surface->resource.allocatedMemory = NULL;
604     surface->resource.heapMemory = NULL;
605     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
606 }
607
608 /* Context activation is done by the caller. */
609 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
610         struct wined3d_context *context, BOOL srgb)
611 {
612     struct wined3d_device *device = surface->resource.device;
613     DWORD active_sampler;
614
615     /* We don't need a specific texture unit, but after binding the texture
616      * the current unit is dirty. Read the unit back instead of switching to
617      * 0, this avoids messing around with the state manager's GL states. The
618      * current texture unit should always be a valid one.
619      *
620      * To be more specific, this is tricky because we can implicitly be
621      * called from sampler() in state.c. This means we can't touch anything
622      * other than whatever happens to be the currently active texture, or we
623      * would risk marking already applied sampler states dirty again. */
624     active_sampler = device->rev_tex_unit_map[context->active_texture];
625
626     if (active_sampler != WINED3D_UNMAPPED_STAGE)
627         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
628     surface_bind(surface, context, srgb);
629 }
630
631 static void surface_force_reload(struct wined3d_surface *surface)
632 {
633     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
634 }
635
636 static void surface_release_client_storage(struct wined3d_surface *surface)
637 {
638     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
639
640     ENTER_GL();
641     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
642     if (surface->texture_name)
643     {
644         surface_bind_and_dirtify(surface, context, FALSE);
645         glTexImage2D(surface->texture_target, surface->texture_level,
646                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
647     }
648     if (surface->texture_name_srgb)
649     {
650         surface_bind_and_dirtify(surface, context, TRUE);
651         glTexImage2D(surface->texture_target, surface->texture_level,
652                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
653     }
654     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
655     LEAVE_GL();
656
657     context_release(context);
658
659     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
660     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
661     surface_force_reload(surface);
662 }
663
664 static HRESULT surface_private_setup(struct wined3d_surface *surface)
665 {
666     /* TODO: Check against the maximum texture sizes supported by the video card. */
667     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
668     unsigned int pow2Width, pow2Height;
669
670     TRACE("surface %p.\n", surface);
671
672     surface->texture_name = 0;
673     surface->texture_target = GL_TEXTURE_2D;
674
675     /* Non-power2 support */
676     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
677     {
678         pow2Width = surface->resource.width;
679         pow2Height = surface->resource.height;
680     }
681     else
682     {
683         /* Find the nearest pow2 match */
684         pow2Width = pow2Height = 1;
685         while (pow2Width < surface->resource.width)
686             pow2Width <<= 1;
687         while (pow2Height < surface->resource.height)
688             pow2Height <<= 1;
689     }
690     surface->pow2Width = pow2Width;
691     surface->pow2Height = pow2Height;
692
693     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
694     {
695         /* TODO: Add support for non power two compressed textures. */
696         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
697         {
698             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
699                   surface, surface->resource.width, surface->resource.height);
700             return WINED3DERR_NOTAVAILABLE;
701         }
702     }
703
704     if (pow2Width != surface->resource.width
705             || pow2Height != surface->resource.height)
706     {
707         surface->flags |= SFLAG_NONPOW2;
708     }
709
710     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
711             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
712     {
713         /* One of three options:
714          * 1: Do the same as we do with NPOT and scale the texture, (any
715          *    texture ops would require the texture to be scaled which is
716          *    potentially slow)
717          * 2: Set the texture to the maximum size (bad idea).
718          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
719          * 4: Create the surface, but allow it to be used only for DirectDraw
720          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
721          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
722          *    the render target. */
723         if (surface->resource.pool == WINED3DPOOL_DEFAULT || surface->resource.pool == WINED3DPOOL_MANAGED)
724         {
725             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
726             return WINED3DERR_NOTAVAILABLE;
727         }
728
729         /* We should never use this surface in combination with OpenGL! */
730         TRACE("Creating an oversized surface: %ux%u.\n",
731                 surface->pow2Width, surface->pow2Height);
732     }
733     else
734     {
735         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
736          * and EXT_PALETTED_TEXTURE is used in combination with texture
737          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
738          * EXT_PALETTED_TEXTURE doesn't work in combination with
739          * ARB_TEXTURE_RECTANGLE. */
740         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
741                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
742                 && gl_info->supported[EXT_PALETTED_TEXTURE]
743                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
744         {
745             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
746             surface->pow2Width = surface->resource.width;
747             surface->pow2Height = surface->resource.height;
748             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
749         }
750     }
751
752     switch (wined3d_settings.offscreen_rendering_mode)
753     {
754         case ORM_FBO:
755             surface->get_drawable_size = get_drawable_size_fbo;
756             break;
757
758         case ORM_BACKBUFFER:
759             surface->get_drawable_size = get_drawable_size_backbuffer;
760             break;
761
762         default:
763             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
764             return WINED3DERR_INVALIDCALL;
765     }
766
767     surface->flags |= SFLAG_INSYSMEM;
768
769     return WINED3D_OK;
770 }
771
772 static void surface_realize_palette(struct wined3d_surface *surface)
773 {
774     struct wined3d_palette *palette = surface->palette;
775
776     TRACE("surface %p.\n", surface);
777
778     if (!palette) return;
779
780     if (surface->resource.format->id == WINED3DFMT_P8_UINT
781             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
782     {
783         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
784         {
785             /* Make sure the texture is up to date. This call doesn't do
786              * anything if the texture is already up to date. */
787             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
788
789             /* We want to force a palette refresh, so mark the drawable as not being up to date */
790             if (!surface_is_offscreen(surface))
791                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
792         }
793         else
794         {
795             if (!(surface->flags & SFLAG_INSYSMEM))
796             {
797                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
798                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
799             }
800             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
801         }
802     }
803
804     if (surface->flags & SFLAG_DIBSECTION)
805     {
806         RGBQUAD col[256];
807         unsigned int i;
808
809         TRACE("Updating the DC's palette.\n");
810
811         for (i = 0; i < 256; ++i)
812         {
813             col[i].rgbRed   = palette->palents[i].peRed;
814             col[i].rgbGreen = palette->palents[i].peGreen;
815             col[i].rgbBlue  = palette->palents[i].peBlue;
816             col[i].rgbReserved = 0;
817         }
818         SetDIBColorTable(surface->hDC, 0, 256, col);
819     }
820
821     /* Propagate the changes to the drawable when we have a palette. */
822     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
823         surface_load_location(surface, surface->draw_binding, NULL);
824 }
825
826 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
827 {
828     HRESULT hr;
829
830     /* If there's no destination surface there is nothing to do. */
831     if (!surface->overlay_dest)
832         return WINED3D_OK;
833
834     /* Blt calls ModifyLocation on the dest surface, which in turn calls
835      * DrawOverlay to update the overlay. Prevent an endless recursion. */
836     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
837         return WINED3D_OK;
838
839     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
840     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
841             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3DTEXF_LINEAR);
842     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
843
844     return hr;
845 }
846
847 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
848 {
849     struct wined3d_device *device = surface->resource.device;
850     const RECT *pass_rect = rect;
851
852     TRACE("surface %p, rect %s, flags %#x.\n",
853             surface, wine_dbgstr_rect(rect), flags);
854
855     if (flags & WINED3DLOCK_DISCARD)
856     {
857         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
858         surface_prepare_system_memory(surface);
859         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
860     }
861     else
862     {
863         /* surface_load_location() does not check if the rectangle specifies
864          * the full surface. Most callers don't need that, so do it here. */
865         if (rect && !rect->top && !rect->left
866                 && rect->right == surface->resource.width
867                 && rect->bottom == surface->resource.height)
868             pass_rect = NULL;
869         surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
870     }
871
872     if (surface->flags & SFLAG_PBO)
873     {
874         const struct wined3d_gl_info *gl_info;
875         struct wined3d_context *context;
876
877         context = context_acquire(device, NULL);
878         gl_info = context->gl_info;
879
880         ENTER_GL();
881         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
882         checkGLcall("glBindBufferARB");
883
884         /* This shouldn't happen but could occur if some other function
885          * didn't handle the PBO properly. */
886         if (surface->resource.allocatedMemory)
887             ERR("The surface already has PBO memory allocated.\n");
888
889         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
890         checkGLcall("glMapBufferARB");
891
892         /* Make sure the PBO isn't set anymore in order not to break non-PBO
893          * calls. */
894         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
895         checkGLcall("glBindBufferARB");
896
897         LEAVE_GL();
898         context_release(context);
899     }
900
901     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
902     {
903         if (!rect)
904             surface_add_dirty_rect(surface, NULL);
905         else
906         {
907             WINED3DBOX b;
908
909             b.Left = rect->left;
910             b.Top = rect->top;
911             b.Right = rect->right;
912             b.Bottom = rect->bottom;
913             b.Front = 0;
914             b.Back = 1;
915             surface_add_dirty_rect(surface, &b);
916         }
917     }
918 }
919
920 static void surface_unmap(struct wined3d_surface *surface)
921 {
922     struct wined3d_device *device = surface->resource.device;
923     BOOL fullsurface;
924
925     TRACE("surface %p.\n", surface);
926
927     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
928
929     if (surface->flags & SFLAG_PBO)
930     {
931         const struct wined3d_gl_info *gl_info;
932         struct wined3d_context *context;
933
934         TRACE("Freeing PBO memory.\n");
935
936         context = context_acquire(device, NULL);
937         gl_info = context->gl_info;
938
939         ENTER_GL();
940         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
941         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
942         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
943         checkGLcall("glUnmapBufferARB");
944         LEAVE_GL();
945         context_release(context);
946
947         surface->resource.allocatedMemory = NULL;
948     }
949
950     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
951
952     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
953     {
954         TRACE("Not dirtified, nothing to do.\n");
955         goto done;
956     }
957
958     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
959             || (device->fb.render_targets && surface == device->fb.render_targets[0]))
960     {
961         if (!surface->dirtyRect.left && !surface->dirtyRect.top
962                 && surface->dirtyRect.right == surface->resource.width
963                 && surface->dirtyRect.bottom == surface->resource.height)
964         {
965             fullsurface = TRUE;
966         }
967         else
968         {
969             /* TODO: Proper partial rectangle tracking. */
970             fullsurface = FALSE;
971             surface->flags |= SFLAG_INSYSMEM;
972         }
973
974         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
975
976         /* Partial rectangle tracking is not commonly implemented, it is only
977          * done for render targets. INSYSMEM was set before to tell
978          * surface_load_location() where to read the rectangle from.
979          * Indrawable is set because all modifications from the partial
980          * sysmem copy are written back to the drawable, thus the surface is
981          * merged again in the drawable. The sysmem copy is not fully up to
982          * date because only a subrectangle was read in Map(). */
983         if (!fullsurface)
984         {
985             surface_modify_location(surface, surface->draw_binding, TRUE);
986             surface_evict_sysmem(surface);
987         }
988
989         surface->dirtyRect.left = surface->resource.width;
990         surface->dirtyRect.top = surface->resource.height;
991         surface->dirtyRect.right = 0;
992         surface->dirtyRect.bottom = 0;
993     }
994     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
995     {
996         FIXME("Depth / stencil buffer locking is not implemented.\n");
997     }
998
999 done:
1000     /* Overlays have to be redrawn manually after changes with the GL implementation */
1001     if (surface->overlay_dest)
1002         surface_draw_overlay(surface);
1003 }
1004
1005 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1006 {
1007     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1008         return FALSE;
1009     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1010         return FALSE;
1011     return TRUE;
1012 }
1013
1014 static void wined3d_surface_depth_blt_fbo(const struct wined3d_device *device, struct wined3d_surface *src_surface,
1015         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1016 {
1017     const struct wined3d_gl_info *gl_info;
1018     struct wined3d_context *context;
1019     DWORD src_mask, dst_mask;
1020     GLbitfield gl_mask;
1021
1022     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1023             device, src_surface, wine_dbgstr_rect(src_rect),
1024             dst_surface, wine_dbgstr_rect(dst_rect));
1025
1026     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1027     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1028
1029     if (src_mask != dst_mask)
1030     {
1031         ERR("Incompatible formats %s and %s.\n",
1032                 debug_d3dformat(src_surface->resource.format->id),
1033                 debug_d3dformat(dst_surface->resource.format->id));
1034         return;
1035     }
1036
1037     if (!src_mask)
1038     {
1039         ERR("Not a depth / stencil format: %s.\n",
1040                 debug_d3dformat(src_surface->resource.format->id));
1041         return;
1042     }
1043
1044     gl_mask = 0;
1045     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1046         gl_mask |= GL_DEPTH_BUFFER_BIT;
1047     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1048         gl_mask |= GL_STENCIL_BUFFER_BIT;
1049
1050     /* Make sure the locations are up-to-date. Loading the destination
1051      * surface isn't required if the entire surface is overwritten. */
1052     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1053     if (!surface_is_full_rect(dst_surface, dst_rect))
1054         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1055
1056     context = context_acquire(device, NULL);
1057     if (!context->valid)
1058     {
1059         context_release(context);
1060         WARN("Invalid context, skipping blit.\n");
1061         return;
1062     }
1063
1064     gl_info = context->gl_info;
1065
1066     ENTER_GL();
1067
1068     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1069     glReadBuffer(GL_NONE);
1070     checkGLcall("glReadBuffer()");
1071     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1072
1073     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1074     context_set_draw_buffer(context, GL_NONE);
1075     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1076
1077     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1078     {
1079         glDepthMask(GL_TRUE);
1080         context_invalidate_state(context, STATE_RENDER(WINED3DRS_ZWRITEENABLE));
1081     }
1082     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1083     {
1084         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1085         {
1086             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1087             context_invalidate_state(context, STATE_RENDER(WINED3DRS_TWOSIDEDSTENCILMODE));
1088         }
1089         glStencilMask(~0U);
1090         context_invalidate_state(context, STATE_RENDER(WINED3DRS_STENCILWRITEMASK));
1091     }
1092
1093     glDisable(GL_SCISSOR_TEST);
1094     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1095
1096     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1097             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1098     checkGLcall("glBlitFramebuffer()");
1099
1100     LEAVE_GL();
1101
1102     if (wined3d_settings.strict_draw_ordering)
1103         wglFlush(); /* Flush to ensure ordering across contexts. */
1104
1105     context_release(context);
1106 }
1107
1108 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1109  * Depth / stencil is not supported. */
1110 static void surface_blt_fbo(const struct wined3d_device *device, const WINED3DTEXTUREFILTERTYPE filter,
1111         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1112         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1113 {
1114     const struct wined3d_gl_info *gl_info;
1115     struct wined3d_context *context;
1116     RECT src_rect, dst_rect;
1117     GLenum gl_filter;
1118     GLenum buffer;
1119
1120     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1121     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1122             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1123     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1124             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1125
1126     src_rect = *src_rect_in;
1127     dst_rect = *dst_rect_in;
1128
1129     switch (filter)
1130     {
1131         case WINED3DTEXF_LINEAR:
1132             gl_filter = GL_LINEAR;
1133             break;
1134
1135         default:
1136             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1137         case WINED3DTEXF_NONE:
1138         case WINED3DTEXF_POINT:
1139             gl_filter = GL_NEAREST;
1140             break;
1141     }
1142
1143     /* Resolve the source surface first if needed. */
1144     if (src_location == SFLAG_INRB_MULTISAMPLE
1145             && (src_surface->resource.format->id != dst_surface->resource.format->id
1146                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1147                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1148         src_location = SFLAG_INRB_RESOLVED;
1149
1150     /* Make sure the locations are up-to-date. Loading the destination
1151      * surface isn't required if the entire surface is overwritten. (And is
1152      * in fact harmful if we're being called by surface_load_location() with
1153      * the purpose of loading the destination surface.) */
1154     surface_load_location(src_surface, src_location, NULL);
1155     if (!surface_is_full_rect(dst_surface, &dst_rect))
1156         surface_load_location(dst_surface, dst_location, NULL);
1157
1158     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1159     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1160     else context = context_acquire(device, NULL);
1161
1162     if (!context->valid)
1163     {
1164         context_release(context);
1165         WARN("Invalid context, skipping blit.\n");
1166         return;
1167     }
1168
1169     gl_info = context->gl_info;
1170
1171     if (src_location == SFLAG_INDRAWABLE)
1172     {
1173         TRACE("Source surface %p is onscreen.\n", src_surface);
1174         buffer = surface_get_gl_buffer(src_surface);
1175         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1176     }
1177     else
1178     {
1179         TRACE("Source surface %p is offscreen.\n", src_surface);
1180         buffer = GL_COLOR_ATTACHMENT0;
1181     }
1182
1183     ENTER_GL();
1184     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1185     glReadBuffer(buffer);
1186     checkGLcall("glReadBuffer()");
1187     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1188     LEAVE_GL();
1189
1190     if (dst_location == SFLAG_INDRAWABLE)
1191     {
1192         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1193         buffer = surface_get_gl_buffer(dst_surface);
1194         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1195     }
1196     else
1197     {
1198         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1199         buffer = GL_COLOR_ATTACHMENT0;
1200     }
1201
1202     ENTER_GL();
1203     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1204     context_set_draw_buffer(context, buffer);
1205     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1206     context_invalidate_state(context, STATE_FRAMEBUFFER);
1207
1208     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1209     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE));
1210     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE1));
1211     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE2));
1212     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE3));
1213
1214     glDisable(GL_SCISSOR_TEST);
1215     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1216
1217     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1218             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1219     checkGLcall("glBlitFramebuffer()");
1220
1221     LEAVE_GL();
1222
1223     if (wined3d_settings.strict_draw_ordering
1224             || (dst_location == SFLAG_INDRAWABLE
1225             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1226         wglFlush();
1227
1228     context_release(context);
1229 }
1230
1231 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1232         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
1233         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
1234 {
1235     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1236         return FALSE;
1237
1238     /* Source and/or destination need to be on the GL side */
1239     if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
1240         return FALSE;
1241
1242     switch (blit_op)
1243     {
1244         case WINED3D_BLIT_OP_COLOR_BLIT:
1245             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1246                 return FALSE;
1247             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1248                 return FALSE;
1249             break;
1250
1251         case WINED3D_BLIT_OP_DEPTH_BLIT:
1252             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1253                 return FALSE;
1254             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1255                 return FALSE;
1256             break;
1257
1258         default:
1259             return FALSE;
1260     }
1261
1262     if (!(src_format->id == dst_format->id
1263             || (is_identity_fixup(src_format->color_fixup)
1264             && is_identity_fixup(dst_format->color_fixup))))
1265         return FALSE;
1266
1267     return TRUE;
1268 }
1269
1270 /* This function checks if the primary render target uses the 8bit paletted format. */
1271 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1272 {
1273     if (device->fb.render_targets && device->fb.render_targets[0])
1274     {
1275         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1276         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1277                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1278             return TRUE;
1279     }
1280     return FALSE;
1281 }
1282
1283 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1284         DWORD color, struct wined3d_color *float_color)
1285 {
1286     const struct wined3d_format *format = surface->resource.format;
1287     const struct wined3d_device *device = surface->resource.device;
1288
1289     switch (format->id)
1290     {
1291         case WINED3DFMT_P8_UINT:
1292             if (surface->palette)
1293             {
1294                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1295                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1296                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1297             }
1298             else
1299             {
1300                 float_color->r = 0.0f;
1301                 float_color->g = 0.0f;
1302                 float_color->b = 0.0f;
1303             }
1304             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1305             break;
1306
1307         case WINED3DFMT_B5G6R5_UNORM:
1308             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1309             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1310             float_color->b = (color & 0x1f) / 31.0f;
1311             float_color->a = 1.0f;
1312             break;
1313
1314         case WINED3DFMT_B8G8R8_UNORM:
1315         case WINED3DFMT_B8G8R8X8_UNORM:
1316             float_color->r = D3DCOLOR_R(color);
1317             float_color->g = D3DCOLOR_G(color);
1318             float_color->b = D3DCOLOR_B(color);
1319             float_color->a = 1.0f;
1320             break;
1321
1322         case WINED3DFMT_B8G8R8A8_UNORM:
1323             float_color->r = D3DCOLOR_R(color);
1324             float_color->g = D3DCOLOR_G(color);
1325             float_color->b = D3DCOLOR_B(color);
1326             float_color->a = D3DCOLOR_A(color);
1327             break;
1328
1329         default:
1330             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1331             return FALSE;
1332     }
1333
1334     return TRUE;
1335 }
1336
1337 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1338 {
1339     const struct wined3d_format *format = surface->resource.format;
1340
1341     switch (format->id)
1342     {
1343         case WINED3DFMT_S1_UINT_D15_UNORM:
1344             *float_depth = depth / (float)0x00007fff;
1345             break;
1346
1347         case WINED3DFMT_D16_UNORM:
1348             *float_depth = depth / (float)0x0000ffff;
1349             break;
1350
1351         case WINED3DFMT_D24_UNORM_S8_UINT:
1352         case WINED3DFMT_X8D24_UNORM:
1353             *float_depth = depth / (float)0x00ffffff;
1354             break;
1355
1356         case WINED3DFMT_D32_UNORM:
1357             *float_depth = depth / (float)0xffffffff;
1358             break;
1359
1360         default:
1361             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1362             return FALSE;
1363     }
1364
1365     return TRUE;
1366 }
1367
1368 /* Do not call while under the GL lock. */
1369 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1370 {
1371     const struct wined3d_resource *resource = &surface->resource;
1372     struct wined3d_device *device = resource->device;
1373     const struct blit_shader *blitter;
1374
1375     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1376             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1377     if (!blitter)
1378     {
1379         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1380         return WINED3DERR_INVALIDCALL;
1381     }
1382
1383     return blitter->depth_fill(device, surface, rect, depth);
1384 }
1385
1386 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1387         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1388 {
1389     struct wined3d_device *device = src_surface->resource.device;
1390
1391     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1392             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1393             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1394         return WINED3DERR_INVALIDCALL;
1395
1396     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1397
1398     surface_modify_ds_location(dst_surface, SFLAG_DS_OFFSCREEN,
1399             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1400     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
1401
1402     return WINED3D_OK;
1403 }
1404
1405 /* Do not call while under the GL lock. */
1406 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1407         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1408         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1409 {
1410     struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1411     struct wined3d_device *device = dst_surface->resource.device;
1412     DWORD src_ds_flags, dst_ds_flags;
1413     RECT src_rect, dst_rect;
1414     BOOL scale, convert;
1415
1416     static const DWORD simple_blit = WINEDDBLT_ASYNC
1417             | WINEDDBLT_COLORFILL
1418             | WINEDDBLT_WAIT
1419             | WINEDDBLT_DEPTHFILL
1420             | WINEDDBLT_DONOTWAIT;
1421
1422     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1423             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1424             flags, fx, debug_d3dtexturefiltertype(filter));
1425     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1426
1427     if (fx)
1428     {
1429         TRACE("dwSize %#x.\n", fx->dwSize);
1430         TRACE("dwDDFX %#x.\n", fx->dwDDFX);
1431         TRACE("dwROP %#x.\n", fx->dwROP);
1432         TRACE("dwDDROP %#x.\n", fx->dwDDROP);
1433         TRACE("dwRotationAngle %#x.\n", fx->dwRotationAngle);
1434         TRACE("dwZBufferOpCode %#x.\n", fx->dwZBufferOpCode);
1435         TRACE("dwZBufferLow %#x.\n", fx->dwZBufferLow);
1436         TRACE("dwZBufferHigh %#x.\n", fx->dwZBufferHigh);
1437         TRACE("dwZBufferBaseDest %#x.\n", fx->dwZBufferBaseDest);
1438         TRACE("dwZDestConstBitDepth %#x.\n", fx->dwZDestConstBitDepth);
1439         TRACE("lpDDSZBufferDest %p.\n", fx->u1.lpDDSZBufferDest);
1440         TRACE("dwZSrcConstBitDepth %#x.\n", fx->dwZSrcConstBitDepth);
1441         TRACE("lpDDSZBufferSrc %p.\n", fx->u2.lpDDSZBufferSrc);
1442         TRACE("dwAlphaEdgeBlendBitDepth %#x.\n", fx->dwAlphaEdgeBlendBitDepth);
1443         TRACE("dwAlphaEdgeBlend %#x.\n", fx->dwAlphaEdgeBlend);
1444         TRACE("dwReserved %#x.\n", fx->dwReserved);
1445         TRACE("dwAlphaDestConstBitDepth %#x.\n", fx->dwAlphaDestConstBitDepth);
1446         TRACE("lpDDSAlphaDest %p.\n", fx->u3.lpDDSAlphaDest);
1447         TRACE("dwAlphaSrcConstBitDepth %#x.\n", fx->dwAlphaSrcConstBitDepth);
1448         TRACE("lpDDSAlphaSrc %p.\n", fx->u4.lpDDSAlphaSrc);
1449         TRACE("lpDDSPattern %p.\n", fx->u5.lpDDSPattern);
1450         TRACE("ddckDestColorkey {%#x, %#x}.\n",
1451                 fx->ddckDestColorkey.dwColorSpaceLowValue,
1452                 fx->ddckDestColorkey.dwColorSpaceHighValue);
1453         TRACE("ddckSrcColorkey {%#x, %#x}.\n",
1454                 fx->ddckSrcColorkey.dwColorSpaceLowValue,
1455                 fx->ddckSrcColorkey.dwColorSpaceHighValue);
1456     }
1457
1458     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1459     {
1460         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1461         return WINEDDERR_SURFACEBUSY;
1462     }
1463
1464     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1465
1466     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1467             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1468             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1469             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1470             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1471     {
1472         /* The destination rect can be out of bounds on the condition
1473          * that a clipper is set for the surface. */
1474         if (dst_surface->clipper)
1475             FIXME("Blit clipping not implemented.\n");
1476         else
1477             WARN("The application gave us a bad destination rectangle without a clipper set.\n");
1478         return WINEDDERR_INVALIDRECT;
1479     }
1480
1481     if (src_surface)
1482     {
1483         surface_get_rect(src_surface, src_rect_in, &src_rect);
1484
1485         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1486                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1487                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1488                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1489                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1490         {
1491             WARN("Application gave us bad source rectangle for Blt.\n");
1492             return WINEDDERR_INVALIDRECT;
1493         }
1494     }
1495     else
1496     {
1497         memset(&src_rect, 0, sizeof(src_rect));
1498     }
1499
1500     if (!fx || !(fx->dwDDFX))
1501         flags &= ~WINEDDBLT_DDFX;
1502
1503     if (flags & WINEDDBLT_WAIT)
1504         flags &= ~WINEDDBLT_WAIT;
1505
1506     if (flags & WINEDDBLT_ASYNC)
1507     {
1508         static unsigned int once;
1509
1510         if (!once++)
1511             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1512         flags &= ~WINEDDBLT_ASYNC;
1513     }
1514
1515     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1516     if (flags & WINEDDBLT_DONOTWAIT)
1517     {
1518         static unsigned int once;
1519
1520         if (!once++)
1521             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1522         flags &= ~WINEDDBLT_DONOTWAIT;
1523     }
1524
1525     if (!device->d3d_initialized)
1526     {
1527         WARN("D3D not initialized, using fallback.\n");
1528         goto cpu;
1529     }
1530
1531     /* We want to avoid invalidating the sysmem location for converted
1532      * surfaces, since otherwise we'd have to convert the data back when
1533      * locking them. */
1534     if (dst_surface->flags & SFLAG_CONVERTED)
1535     {
1536         WARN("Converted surface, using CPU blit.\n");
1537         return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1538     }
1539
1540     if (flags & ~simple_blit)
1541     {
1542         WARN("Using fallback for complex blit (%#x).\n", flags);
1543         goto fallback;
1544     }
1545
1546     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1547         src_swapchain = src_surface->container.u.swapchain;
1548     else
1549         src_swapchain = NULL;
1550
1551     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1552         dst_swapchain = dst_surface->container.u.swapchain;
1553     else
1554         dst_swapchain = NULL;
1555
1556     /* This isn't strictly needed. FBO blits for example could deal with
1557      * cross-swapchain blits by first downloading the source to a texture
1558      * before switching to the destination context. We just have this here to
1559      * not have to deal with the issue, since cross-swapchain blits should be
1560      * rare. */
1561     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1562     {
1563         FIXME("Using fallback for cross-swapchain blit.\n");
1564         goto fallback;
1565     }
1566
1567     scale = src_surface
1568             && (src_rect.right - src_rect.left != dst_rect.right - dst_rect.left
1569             || src_rect.bottom - src_rect.top != dst_rect.bottom - dst_rect.top);
1570     convert = src_surface && src_surface->resource.format->id != dst_surface->resource.format->id;
1571
1572     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1573     if (src_surface)
1574         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1575     else
1576         src_ds_flags = 0;
1577
1578     if (src_ds_flags || dst_ds_flags)
1579     {
1580         if (flags & WINEDDBLT_DEPTHFILL)
1581         {
1582             float depth;
1583
1584             TRACE("Depth fill.\n");
1585
1586             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1587                 return WINED3DERR_INVALIDCALL;
1588
1589             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1590                 return WINED3D_OK;
1591         }
1592         else
1593         {
1594             /* Accessing depth / stencil surfaces is supposed to fail while in
1595              * a scene, except for fills, which seem to work. */
1596             if (device->inScene)
1597             {
1598                 WARN("Rejecting depth / stencil access while in scene.\n");
1599                 return WINED3DERR_INVALIDCALL;
1600             }
1601
1602             if (src_ds_flags != dst_ds_flags)
1603             {
1604                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1605                 return WINED3DERR_INVALIDCALL;
1606             }
1607
1608             if (src_rect.top || src_rect.left
1609                     || src_rect.bottom != src_surface->resource.height
1610                     || src_rect.right != src_surface->resource.width)
1611             {
1612                 WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
1613                         wine_dbgstr_rect(&src_rect));
1614                 return WINED3DERR_INVALIDCALL;
1615             }
1616
1617             if (dst_rect.top || dst_rect.left
1618                     || dst_rect.bottom != dst_surface->resource.height
1619                     || dst_rect.right != dst_surface->resource.width)
1620             {
1621                 WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
1622                         wine_dbgstr_rect(&src_rect));
1623                 return WINED3DERR_INVALIDCALL;
1624             }
1625
1626             if (scale)
1627             {
1628                 WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
1629                 return WINED3DERR_INVALIDCALL;
1630             }
1631
1632             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1633                 return WINED3D_OK;
1634         }
1635     }
1636     else
1637     {
1638         /* In principle this would apply to depth blits as well, but we don't
1639          * implement those in the CPU blitter at the moment. */
1640         if ((dst_surface->flags & SFLAG_INSYSMEM)
1641                 && (!src_surface || (src_surface->flags & SFLAG_INSYSMEM)))
1642         {
1643             if (scale)
1644                 TRACE("Not doing sysmem blit because of scaling.\n");
1645             else if (convert)
1646                 TRACE("Not doing sysmem blit because of format conversion.\n");
1647             else
1648                 return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1649         }
1650
1651         if (flags & WINEDDBLT_COLORFILL)
1652         {
1653             struct wined3d_color color;
1654
1655             TRACE("Color fill.\n");
1656
1657             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1658                 goto fallback;
1659
1660             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1661                 return WINED3D_OK;
1662         }
1663         else
1664         {
1665             TRACE("Color blit.\n");
1666
1667             /* Upload */
1668             if ((src_surface->flags & SFLAG_INSYSMEM) && !(dst_surface->flags & SFLAG_INSYSMEM))
1669             {
1670                 if (scale)
1671                     TRACE("Not doing upload because of scaling.\n");
1672                 else if (convert)
1673                     TRACE("Not doing upload because of format conversion.\n");
1674                 else
1675                 {
1676                     POINT dst_point = {dst_rect.left, dst_rect.top};
1677
1678                     if (SUCCEEDED(surface_upload_from_surface(dst_surface, &dst_point, src_surface, &src_rect)))
1679                     {
1680                         if (!surface_is_offscreen(dst_surface))
1681                             surface_load_location(dst_surface, dst_surface->draw_binding, NULL);
1682                         return WINED3D_OK;
1683                     }
1684                 }
1685             }
1686
1687             /* Use present for back -> front blits. The idea behind this is
1688              * that present is potentially faster than a blit, in particular
1689              * when FBO blits aren't available. Some ddraw applications like
1690              * Half-Life and Prince of Persia 3D use Blt() from the backbuffer
1691              * to the frontbuffer instead of doing a Flip(). D3D8 and D3D9
1692              * applications can't blit directly to the frontbuffer. */
1693             if (dst_swapchain && dst_swapchain->back_buffers
1694                     && dst_surface == dst_swapchain->front_buffer
1695                     && src_surface == dst_swapchain->back_buffers[0])
1696             {
1697                 WINED3DSWAPEFFECT swap_effect = dst_swapchain->desc.swap_effect;
1698
1699                 TRACE("Using present for backbuffer -> frontbuffer blit.\n");
1700
1701                 /* Set the swap effect to COPY, we don't want the backbuffer
1702                  * to become undefined. */
1703                 dst_swapchain->desc.swap_effect = WINED3DSWAPEFFECT_COPY;
1704                 wined3d_swapchain_present(dst_swapchain, NULL, NULL, dst_swapchain->win_handle, NULL, 0);
1705                 dst_swapchain->desc.swap_effect = swap_effect;
1706
1707                 return WINED3D_OK;
1708             }
1709
1710             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1711                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1712                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1713             {
1714                 TRACE("Using FBO blit.\n");
1715
1716                 surface_blt_fbo(device, filter,
1717                         src_surface, src_surface->draw_binding, &src_rect,
1718                         dst_surface, dst_surface->draw_binding, &dst_rect);
1719                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1720                 return WINED3D_OK;
1721             }
1722
1723             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1724                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1725                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1726             {
1727                 TRACE("Using arbfp blit.\n");
1728
1729                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1730                     return WINED3D_OK;
1731             }
1732         }
1733     }
1734
1735 fallback:
1736
1737     /* Special cases for render targets. */
1738     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1739             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1740     {
1741         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1742                 src_surface, &src_rect, flags, fx, filter)))
1743             return WINED3D_OK;
1744     }
1745
1746 cpu:
1747
1748     /* For the rest call the X11 surface implementation. For render targets
1749      * this should be implemented OpenGL accelerated in BltOverride, other
1750      * blits are rather rare. */
1751     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1752 }
1753
1754 HRESULT CDECL wined3d_surface_get_render_target_data(struct wined3d_surface *surface,
1755         struct wined3d_surface *render_target)
1756 {
1757     TRACE("surface %p, render_target %p.\n", surface, render_target);
1758
1759     /* TODO: Check surface sizes, pools, etc. */
1760
1761     if (render_target->resource.multisample_type)
1762         return WINED3DERR_INVALIDCALL;
1763
1764     return wined3d_surface_blt(surface, NULL, render_target, NULL, 0, NULL, WINED3DTEXF_POINT);
1765 }
1766
1767 /* Context activation is done by the caller. */
1768 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1769 {
1770     if (!surface->resource.heapMemory)
1771     {
1772         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1773         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1774                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1775     }
1776
1777     ENTER_GL();
1778     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1779     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1780     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1781             surface->resource.size, surface->resource.allocatedMemory));
1782     checkGLcall("glGetBufferSubDataARB");
1783     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1784     checkGLcall("glDeleteBuffersARB");
1785     LEAVE_GL();
1786
1787     surface->pbo = 0;
1788     surface->flags &= ~SFLAG_PBO;
1789 }
1790
1791 /* Do not call while under the GL lock. */
1792 static void surface_unload(struct wined3d_resource *resource)
1793 {
1794     struct wined3d_surface *surface = surface_from_resource(resource);
1795     struct wined3d_renderbuffer_entry *entry, *entry2;
1796     struct wined3d_device *device = resource->device;
1797     const struct wined3d_gl_info *gl_info;
1798     struct wined3d_context *context;
1799
1800     TRACE("surface %p.\n", surface);
1801
1802     if (resource->pool == WINED3DPOOL_DEFAULT)
1803     {
1804         /* Default pool resources are supposed to be destroyed before Reset is called.
1805          * Implicit resources stay however. So this means we have an implicit render target
1806          * or depth stencil. The content may be destroyed, but we still have to tear down
1807          * opengl resources, so we cannot leave early.
1808          *
1809          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1810          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1811          * or the depth stencil into an FBO the texture or render buffer will be removed
1812          * and all flags get lost
1813          */
1814         surface_init_sysmem(surface);
1815         /* We also get here when the ddraw swapchain is destroyed, for example
1816          * for a mode switch. In this case this surface won't necessarily be
1817          * an implicit surface. We have to mark it lost so that the
1818          * application can restore it after the mode switch. */
1819         surface->flags |= SFLAG_LOST;
1820     }
1821     else
1822     {
1823         /* Load the surface into system memory */
1824         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1825         surface_modify_location(surface, surface->draw_binding, FALSE);
1826     }
1827     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1828     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1829     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1830
1831     context = context_acquire(device, NULL);
1832     gl_info = context->gl_info;
1833
1834     /* Destroy PBOs, but load them into real sysmem before */
1835     if (surface->flags & SFLAG_PBO)
1836         surface_remove_pbo(surface, gl_info);
1837
1838     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1839      * all application-created targets the application has to release the surface
1840      * before calling _Reset
1841      */
1842     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1843     {
1844         ENTER_GL();
1845         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1846         LEAVE_GL();
1847         list_remove(&entry->entry);
1848         HeapFree(GetProcessHeap(), 0, entry);
1849     }
1850     list_init(&surface->renderbuffers);
1851     surface->current_renderbuffer = NULL;
1852
1853     ENTER_GL();
1854
1855     /* If we're in a texture, the texture name belongs to the texture.
1856      * Otherwise, destroy it. */
1857     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1858     {
1859         glDeleteTextures(1, &surface->texture_name);
1860         surface->texture_name = 0;
1861         glDeleteTextures(1, &surface->texture_name_srgb);
1862         surface->texture_name_srgb = 0;
1863     }
1864     if (surface->rb_multisample)
1865     {
1866         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1867         surface->rb_multisample = 0;
1868     }
1869     if (surface->rb_resolved)
1870     {
1871         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1872         surface->rb_resolved = 0;
1873     }
1874
1875     LEAVE_GL();
1876
1877     context_release(context);
1878
1879     resource_unload(resource);
1880 }
1881
1882 static const struct wined3d_resource_ops surface_resource_ops =
1883 {
1884     surface_unload,
1885 };
1886
1887 static const struct wined3d_surface_ops surface_ops =
1888 {
1889     surface_private_setup,
1890     surface_realize_palette,
1891     surface_map,
1892     surface_unmap,
1893 };
1894
1895 /*****************************************************************************
1896  * Initializes the GDI surface, aka creates the DIB section we render to
1897  * The DIB section creation is done by calling GetDC, which will create the
1898  * section and releasing the dc to allow the app to use it. The dib section
1899  * will stay until the surface is released
1900  *
1901  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1902  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1903  * avoid confusion in the shared surface code.
1904  *
1905  * Returns:
1906  *  WINED3D_OK on success
1907  *  The return values of called methods on failure
1908  *
1909  *****************************************************************************/
1910 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1911 {
1912     HRESULT hr;
1913
1914     TRACE("surface %p.\n", surface);
1915
1916     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1917     {
1918         ERR("Overlays not yet supported by GDI surfaces.\n");
1919         return WINED3DERR_INVALIDCALL;
1920     }
1921
1922     /* Sysmem textures have memory already allocated - release it,
1923      * this avoids an unnecessary memcpy. */
1924     hr = surface_create_dib_section(surface);
1925     if (SUCCEEDED(hr))
1926     {
1927         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1928         surface->resource.heapMemory = NULL;
1929         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1930     }
1931
1932     /* We don't mind the nonpow2 stuff in GDI. */
1933     surface->pow2Width = surface->resource.width;
1934     surface->pow2Height = surface->resource.height;
1935
1936     return WINED3D_OK;
1937 }
1938
1939 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1940 {
1941     struct wined3d_palette *palette = surface->palette;
1942
1943     TRACE("surface %p.\n", surface);
1944
1945     if (!palette) return;
1946
1947     if (surface->flags & SFLAG_DIBSECTION)
1948     {
1949         RGBQUAD col[256];
1950         unsigned int i;
1951
1952         TRACE("Updating the DC's palette.\n");
1953
1954         for (i = 0; i < 256; ++i)
1955         {
1956             col[i].rgbRed = palette->palents[i].peRed;
1957             col[i].rgbGreen = palette->palents[i].peGreen;
1958             col[i].rgbBlue = palette->palents[i].peBlue;
1959             col[i].rgbReserved = 0;
1960         }
1961         SetDIBColorTable(surface->hDC, 0, 256, col);
1962     }
1963
1964     /* Update the image because of the palette change. Some games like e.g.
1965      * Red Alert call SetEntries a lot to implement fading. */
1966     /* Tell the swapchain to update the screen. */
1967     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1968     {
1969         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1970         if (surface == swapchain->front_buffer)
1971         {
1972             x11_copy_to_screen(swapchain, NULL);
1973         }
1974     }
1975 }
1976
1977 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
1978 {
1979     TRACE("surface %p, rect %s, flags %#x.\n",
1980             surface, wine_dbgstr_rect(rect), flags);
1981
1982     if (!surface->resource.allocatedMemory)
1983     {
1984         /* This happens on gdi surfaces if the application set a user pointer
1985          * and resets it. Recreate the DIB section. */
1986         surface_create_dib_section(surface);
1987         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1988     }
1989 }
1990
1991 static void gdi_surface_unmap(struct wined3d_surface *surface)
1992 {
1993     TRACE("surface %p.\n", surface);
1994
1995     /* Tell the swapchain to update the screen. */
1996     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1997     {
1998         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1999         if (surface == swapchain->front_buffer)
2000         {
2001             x11_copy_to_screen(swapchain, &surface->lockedRect);
2002         }
2003     }
2004
2005     memset(&surface->lockedRect, 0, sizeof(RECT));
2006 }
2007
2008 static const struct wined3d_surface_ops gdi_surface_ops =
2009 {
2010     gdi_surface_private_setup,
2011     gdi_surface_realize_palette,
2012     gdi_surface_map,
2013     gdi_surface_unmap,
2014 };
2015
2016 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2017 {
2018     GLuint *name;
2019     DWORD flag;
2020
2021     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2022
2023     if(srgb)
2024     {
2025         name = &surface->texture_name_srgb;
2026         flag = SFLAG_INSRGBTEX;
2027     }
2028     else
2029     {
2030         name = &surface->texture_name;
2031         flag = SFLAG_INTEXTURE;
2032     }
2033
2034     if (!*name && new_name)
2035     {
2036         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2037          * surface has no texture name yet. See if we can get rid of this. */
2038         if (surface->flags & flag)
2039             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2040         surface_modify_location(surface, flag, FALSE);
2041     }
2042
2043     *name = new_name;
2044     surface_force_reload(surface);
2045 }
2046
2047 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
2048 {
2049     TRACE("surface %p, target %#x.\n", surface, target);
2050
2051     if (surface->texture_target != target)
2052     {
2053         if (target == GL_TEXTURE_RECTANGLE_ARB)
2054         {
2055             surface->flags &= ~SFLAG_NORMCOORD;
2056         }
2057         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2058         {
2059             surface->flags |= SFLAG_NORMCOORD;
2060         }
2061     }
2062     surface->texture_target = target;
2063     surface_force_reload(surface);
2064 }
2065
2066 /* Context activation is done by the caller. */
2067 void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
2068 {
2069     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
2070
2071     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2072     {
2073         struct wined3d_texture *texture = surface->container.u.texture;
2074
2075         TRACE("Passing to container (%p).\n", texture);
2076         texture->texture_ops->texture_bind(texture, context, srgb);
2077     }
2078     else
2079     {
2080         if (surface->texture_level)
2081         {
2082             ERR("Standalone surface %p is non-zero texture level %u.\n",
2083                     surface, surface->texture_level);
2084         }
2085
2086         if (srgb)
2087             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
2088
2089         ENTER_GL();
2090
2091         if (!surface->texture_name)
2092         {
2093             glGenTextures(1, &surface->texture_name);
2094             checkGLcall("glGenTextures");
2095
2096             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
2097
2098             context_bind_texture(context, surface->texture_target, surface->texture_name);
2099             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2100             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2101             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
2102             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2103             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2104             checkGLcall("glTexParameteri");
2105         }
2106         else
2107         {
2108             context_bind_texture(context, surface->texture_target, surface->texture_name);
2109         }
2110
2111         LEAVE_GL();
2112     }
2113 }
2114
2115 /* This call just downloads data, the caller is responsible for binding the
2116  * correct texture. */
2117 /* Context activation is done by the caller. */
2118 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2119 {
2120     const struct wined3d_format *format = surface->resource.format;
2121
2122     /* Only support read back of converted P8 surfaces. */
2123     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2124     {
2125         ERR("Trying to read back converted surface %p with format %s.\n", surface, debug_d3dformat(format->id));
2126         return;
2127     }
2128
2129     ENTER_GL();
2130
2131     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2132     {
2133         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2134                 surface, surface->texture_level, format->glFormat, format->glType,
2135                 surface->resource.allocatedMemory);
2136
2137         if (surface->flags & SFLAG_PBO)
2138         {
2139             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2140             checkGLcall("glBindBufferARB");
2141             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2142             checkGLcall("glGetCompressedTexImageARB");
2143             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2144             checkGLcall("glBindBufferARB");
2145         }
2146         else
2147         {
2148             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2149                     surface->texture_level, surface->resource.allocatedMemory));
2150             checkGLcall("glGetCompressedTexImageARB");
2151         }
2152
2153         LEAVE_GL();
2154     }
2155     else
2156     {
2157         void *mem;
2158         GLenum gl_format = format->glFormat;
2159         GLenum gl_type = format->glType;
2160         int src_pitch = 0;
2161         int dst_pitch = 0;
2162
2163         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2164         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2165         {
2166             gl_format = GL_ALPHA;
2167             gl_type = GL_UNSIGNED_BYTE;
2168         }
2169
2170         if (surface->flags & SFLAG_NONPOW2)
2171         {
2172             unsigned char alignment = surface->resource.device->surface_alignment;
2173             src_pitch = format->byte_count * surface->pow2Width;
2174             dst_pitch = wined3d_surface_get_pitch(surface);
2175             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2176             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2177         }
2178         else
2179         {
2180             mem = surface->resource.allocatedMemory;
2181         }
2182
2183         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2184                 surface, surface->texture_level, gl_format, gl_type, mem);
2185
2186         if (surface->flags & SFLAG_PBO)
2187         {
2188             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2189             checkGLcall("glBindBufferARB");
2190
2191             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2192             checkGLcall("glGetTexImage");
2193
2194             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2195             checkGLcall("glBindBufferARB");
2196         }
2197         else
2198         {
2199             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2200             checkGLcall("glGetTexImage");
2201         }
2202         LEAVE_GL();
2203
2204         if (surface->flags & SFLAG_NONPOW2)
2205         {
2206             const BYTE *src_data;
2207             BYTE *dst_data;
2208             UINT y;
2209             /*
2210              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2211              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2212              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2213              *
2214              * We're doing this...
2215              *
2216              * instead of boxing the texture :
2217              * |<-texture width ->|  -->pow2width|   /\
2218              * |111111111111111111|              |   |
2219              * |222 Texture 222222| boxed empty  | texture height
2220              * |3333 Data 33333333|              |   |
2221              * |444444444444444444|              |   \/
2222              * -----------------------------------   |
2223              * |     boxed  empty | boxed empty  | pow2height
2224              * |                  |              |   \/
2225              * -----------------------------------
2226              *
2227              *
2228              * we're repacking the data to the expected texture width
2229              *
2230              * |<-texture width ->|  -->pow2width|   /\
2231              * |111111111111111111222222222222222|   |
2232              * |222333333333333333333444444444444| texture height
2233              * |444444                           |   |
2234              * |                                 |   \/
2235              * |                                 |   |
2236              * |            empty                | pow2height
2237              * |                                 |   \/
2238              * -----------------------------------
2239              *
2240              * == is the same as
2241              *
2242              * |<-texture width ->|    /\
2243              * |111111111111111111|
2244              * |222222222222222222|texture height
2245              * |333333333333333333|
2246              * |444444444444444444|    \/
2247              * --------------------
2248              *
2249              * this also means that any references to allocatedMemory should work with the data as if were a
2250              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2251              *
2252              * internally the texture is still stored in a boxed format so any references to textureName will
2253              * get a boxed texture with width pow2width and not a texture of width resource.width.
2254              *
2255              * Performance should not be an issue, because applications normally do not lock the surfaces when
2256              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2257              * and doesn't have to be re-read. */
2258             src_data = mem;
2259             dst_data = surface->resource.allocatedMemory;
2260             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2261             for (y = 1; y < surface->resource.height; ++y)
2262             {
2263                 /* skip the first row */
2264                 src_data += src_pitch;
2265                 dst_data += dst_pitch;
2266                 memcpy(dst_data, src_data, dst_pitch);
2267             }
2268
2269             HeapFree(GetProcessHeap(), 0, mem);
2270         }
2271     }
2272
2273     /* Surface has now been downloaded */
2274     surface->flags |= SFLAG_INSYSMEM;
2275 }
2276
2277 /* This call just uploads data, the caller is responsible for binding the
2278  * correct texture. */
2279 /* Context activation is done by the caller. */
2280 static void surface_upload_data(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2281         const struct wined3d_format *format, const RECT *src_rect, UINT src_pitch, const POINT *dst_point,
2282         BOOL srgb, const struct wined3d_bo_address *data)
2283 {
2284     UINT update_w = src_rect->right - src_rect->left;
2285     UINT update_h = src_rect->bottom - src_rect->top;
2286
2287     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_pitch %u, dst_point %s, srgb %#x, data {%#x:%p}.\n",
2288             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_pitch,
2289             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2290
2291     if (format->heightscale != 1.0f && format->heightscale != 0.0f)
2292         update_h *= format->heightscale;
2293
2294     ENTER_GL();
2295
2296     if (data->buffer_object)
2297     {
2298         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2299         checkGLcall("glBindBufferARB");
2300     }
2301
2302     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2303     {
2304         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2305         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2306         const BYTE *addr = data->addr;
2307         GLenum internal;
2308
2309         addr += (src_rect->top / format->block_height) * src_pitch;
2310         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2311
2312         if (srgb)
2313             internal = format->glGammaInternal;
2314         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2315             internal = format->rtInternal;
2316         else
2317             internal = format->glInternal;
2318
2319         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2320                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2321                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2322
2323         if (row_length == src_pitch)
2324         {
2325             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2326                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2327         }
2328         else
2329         {
2330             UINT row, y;
2331
2332             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2333              * can't use the unpack row length like below. */
2334             for (row = 0, y = dst_point->y; row < row_count; ++row)
2335             {
2336                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2337                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2338                 y += format->block_height;
2339                 addr += src_pitch;
2340             }
2341         }
2342         checkGLcall("glCompressedTexSubImage2DARB");
2343     }
2344     else
2345     {
2346         const BYTE *addr = data->addr;
2347
2348         addr += src_rect->top * src_pitch;
2349         addr += src_rect->left * format->byte_count;
2350
2351         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2352                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2353                 update_w, update_h, format->glFormat, format->glType, addr);
2354
2355         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_pitch / format->byte_count);
2356         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2357                 update_w, update_h, format->glFormat, format->glType, addr);
2358         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2359         checkGLcall("glTexSubImage2D");
2360     }
2361
2362     if (data->buffer_object)
2363     {
2364         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2365         checkGLcall("glBindBufferARB");
2366     }
2367
2368     LEAVE_GL();
2369
2370     if (wined3d_settings.strict_draw_ordering)
2371         wglFlush();
2372
2373     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2374     {
2375         struct wined3d_device *device = surface->resource.device;
2376         unsigned int i;
2377
2378         for (i = 0; i < device->context_count; ++i)
2379         {
2380             context_surface_update(device->contexts[i], surface);
2381         }
2382     }
2383 }
2384
2385 HRESULT surface_upload_from_surface(struct wined3d_surface *dst_surface, const POINT *dst_point,
2386         struct wined3d_surface *src_surface, const RECT *src_rect)
2387 {
2388     const struct wined3d_format *src_format;
2389     const struct wined3d_format *dst_format;
2390     const struct wined3d_gl_info *gl_info;
2391     struct wined3d_context *context;
2392     struct wined3d_bo_address data;
2393     struct wined3d_format format;
2394     UINT update_w, update_h;
2395     CONVERT_TYPES convert;
2396     UINT dst_w, dst_h;
2397     UINT src_w, src_h;
2398     UINT src_pitch;
2399     POINT p;
2400     RECT r;
2401
2402     TRACE("dst_surface %p, dst_point %s, src_surface %p, src_rect %s.\n",
2403             dst_surface, wine_dbgstr_point(dst_point),
2404             src_surface, wine_dbgstr_rect(src_rect));
2405
2406     src_format = src_surface->resource.format;
2407     dst_format = dst_surface->resource.format;
2408
2409     if (src_format->id != dst_format->id)
2410     {
2411         WARN("Source and destination surfaces should have the same format.\n");
2412         return WINED3DERR_INVALIDCALL;
2413     }
2414
2415     if (!dst_point)
2416     {
2417         p.x = 0;
2418         p.y = 0;
2419         dst_point = &p;
2420     }
2421     else if (dst_point->x < 0 || dst_point->y < 0)
2422     {
2423         WARN("Invalid destination point.\n");
2424         return WINED3DERR_INVALIDCALL;
2425     }
2426
2427     if (!src_rect)
2428     {
2429         r.left = 0;
2430         r.top = 0;
2431         r.right = src_surface->resource.width;
2432         r.bottom = src_surface->resource.height;
2433         src_rect = &r;
2434     }
2435     else if (src_rect->left < 0 || src_rect->left >= src_rect->right
2436             || src_rect->top < 0 || src_rect->top >= src_rect->bottom)
2437     {
2438         WARN("Invalid source rectangle.\n");
2439         return WINED3DERR_INVALIDCALL;
2440     }
2441
2442     src_w = src_surface->resource.width;
2443     src_h = src_surface->resource.height;
2444
2445     dst_w = dst_surface->resource.width;
2446     dst_h = dst_surface->resource.height;
2447
2448     update_w = src_rect->right - src_rect->left;
2449     update_h = src_rect->bottom - src_rect->top;
2450
2451     if (update_w > dst_w || dst_point->x > dst_w - update_w
2452             || update_h > dst_h || dst_point->y > dst_h - update_h)
2453     {
2454         WARN("Destination out of bounds.\n");
2455         return WINED3DERR_INVALIDCALL;
2456     }
2457
2458     /* NPOT block sizes would be silly. */
2459     if ((src_format->flags & WINED3DFMT_FLAG_BLOCKS)
2460             && ((update_w & (src_format->block_width - 1) || update_h & (src_format->block_height - 1))
2461             && (src_w != update_w || dst_w != update_w || src_h != update_h || dst_h != update_h)))
2462     {
2463         WARN("Update rect not block-aligned.\n");
2464         return WINED3DERR_INVALIDCALL;
2465     }
2466
2467     /* Use wined3d_surface_blt() instead of uploading directly if we need conversion. */
2468     d3dfmt_get_conv(dst_surface, FALSE, TRUE, &format, &convert);
2469     if (convert != NO_CONVERSION || format.convert)
2470     {
2471         RECT dst_rect = {dst_point->x,  dst_point->y, dst_point->x + update_w, dst_point->y + update_h};
2472         return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, src_rect, 0, NULL, WINED3DTEXF_POINT);
2473     }
2474
2475     context = context_acquire(dst_surface->resource.device, NULL);
2476     gl_info = context->gl_info;
2477
2478     /* Only load the surface for partial updates. For newly allocated texture
2479      * the texture wouldn't be the current location, and we'd upload zeroes
2480      * just to overwrite them again. */
2481     if (update_w == dst_w && update_h == dst_h)
2482         surface_prepare_texture(dst_surface, context, FALSE);
2483     else
2484         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
2485     surface_bind(dst_surface, context, FALSE);
2486
2487     data.buffer_object = src_surface->pbo;
2488     data.addr = src_surface->resource.allocatedMemory;
2489     src_pitch = wined3d_surface_get_pitch(src_surface);
2490
2491     surface_upload_data(dst_surface, gl_info, src_format, src_rect, src_pitch, dst_point, FALSE, &data);
2492
2493     invalidate_active_texture(dst_surface->resource.device, context);
2494
2495     context_release(context);
2496
2497     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
2498     return WINED3D_OK;
2499 }
2500
2501 /* This call just allocates the texture, the caller is responsible for binding
2502  * the correct texture. */
2503 /* Context activation is done by the caller. */
2504 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2505         const struct wined3d_format *format, BOOL srgb)
2506 {
2507     BOOL enable_client_storage = FALSE;
2508     GLsizei width = surface->pow2Width;
2509     GLsizei height = surface->pow2Height;
2510     const BYTE *mem = NULL;
2511     GLenum internal;
2512
2513     if (srgb)
2514     {
2515         internal = format->glGammaInternal;
2516     }
2517     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2518     {
2519         internal = format->rtInternal;
2520     }
2521     else
2522     {
2523         internal = format->glInternal;
2524     }
2525
2526     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2527
2528     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2529             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2530             internal, width, height, format->glFormat, format->glType);
2531
2532     ENTER_GL();
2533
2534     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2535     {
2536         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2537                 || !surface->resource.allocatedMemory)
2538         {
2539             /* In some cases we want to disable client storage.
2540              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2541              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2542              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2543              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2544              */
2545             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2546             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2547             surface->flags &= ~SFLAG_CLIENT;
2548             enable_client_storage = TRUE;
2549         }
2550         else
2551         {
2552             surface->flags |= SFLAG_CLIENT;
2553
2554             /* Point OpenGL to our allocated texture memory. Do not use
2555              * resource.allocatedMemory here because it might point into a
2556              * PBO. Instead use heapMemory, but get the alignment right. */
2557             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2558                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2559         }
2560     }
2561
2562     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2563     {
2564         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2565                 internal, width, height, 0, surface->resource.size, mem));
2566         checkGLcall("glCompressedTexImage2DARB");
2567     }
2568     else
2569     {
2570         glTexImage2D(surface->texture_target, surface->texture_level,
2571                 internal, width, height, 0, format->glFormat, format->glType, mem);
2572         checkGLcall("glTexImage2D");
2573     }
2574
2575     if(enable_client_storage) {
2576         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2577         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2578     }
2579     LEAVE_GL();
2580 }
2581
2582 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2583  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2584 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2585 /* GL locking is done by the caller */
2586 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2587 {
2588     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2589     struct wined3d_renderbuffer_entry *entry;
2590     GLuint renderbuffer = 0;
2591     unsigned int src_width, src_height;
2592     unsigned int width, height;
2593
2594     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2595     {
2596         width = rt->pow2Width;
2597         height = rt->pow2Height;
2598     }
2599     else
2600     {
2601         width = surface->pow2Width;
2602         height = surface->pow2Height;
2603     }
2604
2605     src_width = surface->pow2Width;
2606     src_height = surface->pow2Height;
2607
2608     /* A depth stencil smaller than the render target is not valid */
2609     if (width > src_width || height > src_height) return;
2610
2611     /* Remove any renderbuffer set if the sizes match */
2612     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2613             || (width == src_width && height == src_height))
2614     {
2615         surface->current_renderbuffer = NULL;
2616         return;
2617     }
2618
2619     /* Look if we've already got a renderbuffer of the correct dimensions */
2620     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2621     {
2622         if (entry->width == width && entry->height == height)
2623         {
2624             renderbuffer = entry->id;
2625             surface->current_renderbuffer = entry;
2626             break;
2627         }
2628     }
2629
2630     if (!renderbuffer)
2631     {
2632         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2633         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2634         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2635                 surface->resource.format->glInternal, width, height);
2636
2637         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2638         entry->width = width;
2639         entry->height = height;
2640         entry->id = renderbuffer;
2641         list_add_head(&surface->renderbuffers, &entry->entry);
2642
2643         surface->current_renderbuffer = entry;
2644     }
2645
2646     checkGLcall("set_compatible_renderbuffer");
2647 }
2648
2649 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2650 {
2651     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2652
2653     TRACE("surface %p.\n", surface);
2654
2655     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2656     {
2657         ERR("Surface %p is not on a swapchain.\n", surface);
2658         return GL_NONE;
2659     }
2660
2661     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2662     {
2663         if (swapchain->render_to_fbo)
2664         {
2665             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2666             return GL_COLOR_ATTACHMENT0;
2667         }
2668         TRACE("Returning GL_BACK\n");
2669         return GL_BACK;
2670     }
2671     else if (surface == swapchain->front_buffer)
2672     {
2673         TRACE("Returning GL_FRONT\n");
2674         return GL_FRONT;
2675     }
2676
2677     FIXME("Higher back buffer, returning GL_BACK\n");
2678     return GL_BACK;
2679 }
2680
2681 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2682 void surface_add_dirty_rect(struct wined3d_surface *surface, const WINED3DBOX *dirty_rect)
2683 {
2684     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2685
2686     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2687         /* No partial locking for textures yet. */
2688         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2689
2690     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2691     if (dirty_rect)
2692     {
2693         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->Left);
2694         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->Top);
2695         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->Right);
2696         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->Bottom);
2697     }
2698     else
2699     {
2700         surface->dirtyRect.left = 0;
2701         surface->dirtyRect.top = 0;
2702         surface->dirtyRect.right = surface->resource.width;
2703         surface->dirtyRect.bottom = surface->resource.height;
2704     }
2705
2706     /* if the container is a texture then mark it dirty. */
2707     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2708     {
2709         TRACE("Passing to container.\n");
2710         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2711     }
2712 }
2713
2714 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2715 {
2716     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2717     BOOL ck_changed;
2718
2719     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2720
2721     if (surface->resource.pool == WINED3DPOOL_SCRATCH)
2722     {
2723         ERR("Not supported on scratch surfaces.\n");
2724         return WINED3DERR_INVALIDCALL;
2725     }
2726
2727     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2728
2729     /* Reload if either the texture and sysmem have different ideas about the
2730      * color key, or the actual key values changed. */
2731     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2732             && (surface->glCKey.dwColorSpaceLowValue != surface->SrcBltCKey.dwColorSpaceLowValue
2733             || surface->glCKey.dwColorSpaceHighValue != surface->SrcBltCKey.dwColorSpaceHighValue)))
2734     {
2735         TRACE("Reloading because of color keying\n");
2736         /* To perform the color key conversion we need a sysmem copy of
2737          * the surface. Make sure we have it. */
2738
2739         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2740         /* Make sure the texture is reloaded because of the color key change,
2741          * this kills performance though :( */
2742         /* TODO: This is not necessarily needed with hw palettized texture support. */
2743         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2744         /* Switching color keying on / off may change the internal format. */
2745         if (ck_changed)
2746             surface_force_reload(surface);
2747     }
2748     else if (!(surface->flags & flag))
2749     {
2750         TRACE("Reloading because surface is dirty.\n");
2751     }
2752     else
2753     {
2754         TRACE("surface is already in texture\n");
2755         return WINED3D_OK;
2756     }
2757
2758     /* No partial locking for textures yet. */
2759     surface_load_location(surface, flag, NULL);
2760     surface_evict_sysmem(surface);
2761
2762     return WINED3D_OK;
2763 }
2764
2765 /* See also float_16_to_32() in wined3d_private.h */
2766 static inline unsigned short float_32_to_16(const float *in)
2767 {
2768     int exp = 0;
2769     float tmp = fabsf(*in);
2770     unsigned int mantissa;
2771     unsigned short ret;
2772
2773     /* Deal with special numbers */
2774     if (*in == 0.0f)
2775         return 0x0000;
2776     if (isnan(*in))
2777         return 0x7c01;
2778     if (isinf(*in))
2779         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2780
2781     if (tmp < powf(2, 10))
2782     {
2783         do
2784         {
2785             tmp = tmp * 2.0f;
2786             exp--;
2787         } while (tmp < powf(2, 10));
2788     }
2789     else if (tmp >= powf(2, 11))
2790     {
2791         do
2792         {
2793             tmp /= 2.0f;
2794             exp++;
2795         } while (tmp >= powf(2, 11));
2796     }
2797
2798     mantissa = (unsigned int)tmp;
2799     if (tmp - mantissa >= 0.5f)
2800         ++mantissa; /* Round to nearest, away from zero. */
2801
2802     exp += 10;  /* Normalize the mantissa. */
2803     exp += 15;  /* Exponent is encoded with excess 15. */
2804
2805     if (exp > 30) /* too big */
2806     {
2807         ret = 0x7c00; /* INF */
2808     }
2809     else if (exp <= 0)
2810     {
2811         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2812         while (exp <= 0)
2813         {
2814             mantissa = mantissa >> 1;
2815             ++exp;
2816         }
2817         ret = mantissa & 0x3ff;
2818     }
2819     else
2820     {
2821         ret = (exp << 10) | (mantissa & 0x3ff);
2822     }
2823
2824     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2825     return ret;
2826 }
2827
2828 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2829 {
2830     ULONG refcount;
2831
2832     TRACE("Surface %p, container %p of type %#x.\n",
2833             surface, surface->container.u.base, surface->container.type);
2834
2835     switch (surface->container.type)
2836     {
2837         case WINED3D_CONTAINER_TEXTURE:
2838             return wined3d_texture_incref(surface->container.u.texture);
2839
2840         case WINED3D_CONTAINER_SWAPCHAIN:
2841             return wined3d_swapchain_incref(surface->container.u.swapchain);
2842
2843         default:
2844             ERR("Unhandled container type %#x.\n", surface->container.type);
2845         case WINED3D_CONTAINER_NONE:
2846             break;
2847     }
2848
2849     refcount = InterlockedIncrement(&surface->resource.ref);
2850     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2851
2852     return refcount;
2853 }
2854
2855 /* Do not call while under the GL lock. */
2856 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2857 {
2858     ULONG refcount;
2859
2860     TRACE("Surface %p, container %p of type %#x.\n",
2861             surface, surface->container.u.base, surface->container.type);
2862
2863     switch (surface->container.type)
2864     {
2865         case WINED3D_CONTAINER_TEXTURE:
2866             return wined3d_texture_decref(surface->container.u.texture);
2867
2868         case WINED3D_CONTAINER_SWAPCHAIN:
2869             return wined3d_swapchain_decref(surface->container.u.swapchain);
2870
2871         default:
2872             ERR("Unhandled container type %#x.\n", surface->container.type);
2873         case WINED3D_CONTAINER_NONE:
2874             break;
2875     }
2876
2877     refcount = InterlockedDecrement(&surface->resource.ref);
2878     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2879
2880     if (!refcount)
2881     {
2882         surface_cleanup(surface);
2883         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2884
2885         TRACE("Destroyed surface %p.\n", surface);
2886         HeapFree(GetProcessHeap(), 0, surface);
2887     }
2888
2889     return refcount;
2890 }
2891
2892 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2893 {
2894     return resource_set_priority(&surface->resource, priority);
2895 }
2896
2897 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2898 {
2899     return resource_get_priority(&surface->resource);
2900 }
2901
2902 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2903 {
2904     TRACE("surface %p.\n", surface);
2905
2906     if (!surface->resource.device->d3d_initialized)
2907     {
2908         ERR("D3D not initialized.\n");
2909         return;
2910     }
2911
2912     surface_internal_preload(surface, SRGB_ANY);
2913 }
2914
2915 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2916 {
2917     TRACE("surface %p.\n", surface);
2918
2919     return surface->resource.parent;
2920 }
2921
2922 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2923 {
2924     TRACE("surface %p.\n", surface);
2925
2926     return &surface->resource;
2927 }
2928
2929 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2930 {
2931     TRACE("surface %p, flags %#x.\n", surface, flags);
2932
2933     switch (flags)
2934     {
2935         case WINEDDGBS_CANBLT:
2936         case WINEDDGBS_ISBLTDONE:
2937             return WINED3D_OK;
2938
2939         default:
2940             return WINED3DERR_INVALIDCALL;
2941     }
2942 }
2943
2944 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2945 {
2946     TRACE("surface %p, flags %#x.\n", surface, flags);
2947
2948     /* XXX: DDERR_INVALIDSURFACETYPE */
2949
2950     switch (flags)
2951     {
2952         case WINEDDGFS_CANFLIP:
2953         case WINEDDGFS_ISFLIPDONE:
2954             return WINED3D_OK;
2955
2956         default:
2957             return WINED3DERR_INVALIDCALL;
2958     }
2959 }
2960
2961 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2962 {
2963     TRACE("surface %p.\n", surface);
2964
2965     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2966     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2967 }
2968
2969 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2970 {
2971     TRACE("surface %p.\n", surface);
2972
2973     surface->flags &= ~SFLAG_LOST;
2974     return WINED3D_OK;
2975 }
2976
2977 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2978 {
2979     TRACE("surface %p, palette %p.\n", surface, palette);
2980
2981     if (surface->palette == palette)
2982     {
2983         TRACE("Nop palette change.\n");
2984         return WINED3D_OK;
2985     }
2986
2987     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
2988         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
2989
2990     surface->palette = palette;
2991
2992     if (palette)
2993     {
2994         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
2995             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
2996
2997         surface->surface_ops->surface_realize_palette(surface);
2998     }
2999
3000     return WINED3D_OK;
3001 }
3002
3003 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
3004         DWORD flags, const WINEDDCOLORKEY *color_key)
3005 {
3006     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
3007
3008     if (flags & WINEDDCKEY_COLORSPACE)
3009     {
3010         FIXME(" colorkey value not supported (%08x) !\n", flags);
3011         return WINED3DERR_INVALIDCALL;
3012     }
3013
3014     /* Dirtify the surface, but only if a key was changed. */
3015     if (color_key)
3016     {
3017         switch (flags & ~WINEDDCKEY_COLORSPACE)
3018         {
3019             case WINEDDCKEY_DESTBLT:
3020                 surface->DestBltCKey = *color_key;
3021                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3022                 break;
3023
3024             case WINEDDCKEY_DESTOVERLAY:
3025                 surface->DestOverlayCKey = *color_key;
3026                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3027                 break;
3028
3029             case WINEDDCKEY_SRCOVERLAY:
3030                 surface->SrcOverlayCKey = *color_key;
3031                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3032                 break;
3033
3034             case WINEDDCKEY_SRCBLT:
3035                 surface->SrcBltCKey = *color_key;
3036                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3037                 break;
3038         }
3039     }
3040     else
3041     {
3042         switch (flags & ~WINEDDCKEY_COLORSPACE)
3043         {
3044             case WINEDDCKEY_DESTBLT:
3045                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3046                 break;
3047
3048             case WINEDDCKEY_DESTOVERLAY:
3049                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3050                 break;
3051
3052             case WINEDDCKEY_SRCOVERLAY:
3053                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3054                 break;
3055
3056             case WINEDDCKEY_SRCBLT:
3057                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3058                 break;
3059         }
3060     }
3061
3062     return WINED3D_OK;
3063 }
3064
3065 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3066 {
3067     TRACE("surface %p.\n", surface);
3068
3069     return surface->palette;
3070 }
3071
3072 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3073 {
3074     const struct wined3d_format *format = surface->resource.format;
3075     DWORD pitch;
3076
3077     TRACE("surface %p.\n", surface);
3078
3079     if (format->flags & WINED3DFMT_FLAG_BLOCKS)
3080     {
3081         /* Since compressed formats are block based, pitch means the amount of
3082          * bytes to the next row of block rather than the next row of pixels. */
3083         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3084         pitch = row_block_count * format->block_byte_count;
3085     }
3086     else
3087     {
3088         unsigned char alignment = surface->resource.device->surface_alignment;
3089         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3090         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3091     }
3092
3093     TRACE("Returning %u.\n", pitch);
3094
3095     return pitch;
3096 }
3097
3098 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3099 {
3100     TRACE("surface %p, mem %p.\n", surface, mem);
3101
3102     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
3103     {
3104         WARN("Surface is locked or the DC is in use.\n");
3105         return WINED3DERR_INVALIDCALL;
3106     }
3107
3108     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3109     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3110     {
3111         ERR("Not supported on render targets.\n");
3112         return WINED3DERR_INVALIDCALL;
3113     }
3114
3115     if (mem && mem != surface->resource.allocatedMemory)
3116     {
3117         void *release = NULL;
3118
3119         /* Do I have to copy the old surface content? */
3120         if (surface->flags & SFLAG_DIBSECTION)
3121         {
3122             DeleteDC(surface->hDC);
3123             DeleteObject(surface->dib.DIBsection);
3124             surface->dib.bitmap_data = NULL;
3125             surface->resource.allocatedMemory = NULL;
3126             surface->hDC = NULL;
3127             surface->flags &= ~SFLAG_DIBSECTION;
3128         }
3129         else if (!(surface->flags & SFLAG_USERPTR))
3130         {
3131             release = surface->resource.heapMemory;
3132             surface->resource.heapMemory = NULL;
3133         }
3134         surface->resource.allocatedMemory = mem;
3135         surface->flags |= SFLAG_USERPTR;
3136
3137         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3138         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3139
3140         /* For client textures OpenGL has to be notified. */
3141         if (surface->flags & SFLAG_CLIENT)
3142             surface_release_client_storage(surface);
3143
3144         /* Now free the old memory if any. */
3145         HeapFree(GetProcessHeap(), 0, release);
3146     }
3147     else if (surface->flags & SFLAG_USERPTR)
3148     {
3149         /* HeapMemory should be NULL already. */
3150         if (surface->resource.heapMemory)
3151             ERR("User pointer surface has heap memory allocated.\n");
3152
3153         if (!mem)
3154         {
3155             surface->resource.allocatedMemory = NULL;
3156             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3157
3158             if (surface->flags & SFLAG_CLIENT)
3159                 surface_release_client_storage(surface);
3160
3161             surface_prepare_system_memory(surface);
3162         }
3163
3164         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3165     }
3166
3167     return WINED3D_OK;
3168 }
3169
3170 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3171 {
3172     LONG w, h;
3173
3174     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3175
3176     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3177     {
3178         WARN("Not an overlay surface.\n");
3179         return WINEDDERR_NOTAOVERLAYSURFACE;
3180     }
3181
3182     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3183     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3184     surface->overlay_destrect.left = x;
3185     surface->overlay_destrect.top = y;
3186     surface->overlay_destrect.right = x + w;
3187     surface->overlay_destrect.bottom = y + h;
3188
3189     surface_draw_overlay(surface);
3190
3191     return WINED3D_OK;
3192 }
3193
3194 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3195 {
3196     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3197
3198     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3199     {
3200         TRACE("Not an overlay surface.\n");
3201         return WINEDDERR_NOTAOVERLAYSURFACE;
3202     }
3203
3204     if (!surface->overlay_dest)
3205     {
3206         TRACE("Overlay not visible.\n");
3207         *x = 0;
3208         *y = 0;
3209         return WINEDDERR_OVERLAYNOTVISIBLE;
3210     }
3211
3212     *x = surface->overlay_destrect.left;
3213     *y = surface->overlay_destrect.top;
3214
3215     TRACE("Returning position %d, %d.\n", *x, *y);
3216
3217     return WINED3D_OK;
3218 }
3219
3220 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3221         DWORD flags, struct wined3d_surface *ref)
3222 {
3223     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3224
3225     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3226     {
3227         TRACE("Not an overlay surface.\n");
3228         return WINEDDERR_NOTAOVERLAYSURFACE;
3229     }
3230
3231     return WINED3D_OK;
3232 }
3233
3234 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3235         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3236 {
3237     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3238             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3239
3240     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3241     {
3242         WARN("Not an overlay surface.\n");
3243         return WINEDDERR_NOTAOVERLAYSURFACE;
3244     }
3245     else if (!dst_surface)
3246     {
3247         WARN("Dest surface is NULL.\n");
3248         return WINED3DERR_INVALIDCALL;
3249     }
3250
3251     if (src_rect)
3252     {
3253         surface->overlay_srcrect = *src_rect;
3254     }
3255     else
3256     {
3257         surface->overlay_srcrect.left = 0;
3258         surface->overlay_srcrect.top = 0;
3259         surface->overlay_srcrect.right = surface->resource.width;
3260         surface->overlay_srcrect.bottom = surface->resource.height;
3261     }
3262
3263     if (dst_rect)
3264     {
3265         surface->overlay_destrect = *dst_rect;
3266     }
3267     else
3268     {
3269         surface->overlay_destrect.left = 0;
3270         surface->overlay_destrect.top = 0;
3271         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3272         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3273     }
3274
3275     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3276     {
3277         surface->overlay_dest = NULL;
3278         list_remove(&surface->overlay_entry);
3279     }
3280
3281     if (flags & WINEDDOVER_SHOW)
3282     {
3283         if (surface->overlay_dest != dst_surface)
3284         {
3285             surface->overlay_dest = dst_surface;
3286             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3287         }
3288     }
3289     else if (flags & WINEDDOVER_HIDE)
3290     {
3291         /* tests show that the rectangles are erased on hide */
3292         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3293         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3294         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3295         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3296         surface->overlay_dest = NULL;
3297     }
3298
3299     surface_draw_overlay(surface);
3300
3301     return WINED3D_OK;
3302 }
3303
3304 HRESULT CDECL wined3d_surface_set_clipper(struct wined3d_surface *surface, struct wined3d_clipper *clipper)
3305 {
3306     TRACE("surface %p, clipper %p.\n", surface, clipper);
3307
3308     surface->clipper = clipper;
3309
3310     return WINED3D_OK;
3311 }
3312
3313 struct wined3d_clipper * CDECL wined3d_surface_get_clipper(const struct wined3d_surface *surface)
3314 {
3315     TRACE("surface %p.\n", surface);
3316
3317     return surface->clipper;
3318 }
3319
3320 HRESULT CDECL wined3d_surface_set_format(struct wined3d_surface *surface, enum wined3d_format_id format_id)
3321 {
3322     const struct wined3d_format *format = wined3d_get_format(&surface->resource.device->adapter->gl_info, format_id);
3323
3324     TRACE("surface %p, format %s.\n", surface, debug_d3dformat(format_id));
3325
3326     if (surface->resource.format->id != WINED3DFMT_UNKNOWN)
3327     {
3328         FIXME("The format of the surface must be WINED3DFORMAT_UNKNOWN.\n");
3329         return WINED3DERR_INVALIDCALL;
3330     }
3331
3332     surface->resource.size = wined3d_format_calculate_size(format, surface->resource.device->surface_alignment,
3333             surface->pow2Width, surface->pow2Height);
3334     surface->flags |= (WINED3DFMT_D16_LOCKABLE == format_id) ? SFLAG_LOCKABLE : 0;
3335     surface->resource.format = format;
3336
3337     TRACE("size %u, byte_count %u\n", surface->resource.size, format->byte_count);
3338     TRACE("glFormat %#x, glInternal %#x, glType %#x.\n",
3339             format->glFormat, format->glInternal, format->glType);
3340
3341     return WINED3D_OK;
3342 }
3343
3344 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3345         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3346 {
3347     unsigned short *dst_s;
3348     const float *src_f;
3349     unsigned int x, y;
3350
3351     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3352
3353     for (y = 0; y < h; ++y)
3354     {
3355         src_f = (const float *)(src + y * pitch_in);
3356         dst_s = (unsigned short *) (dst + y * pitch_out);
3357         for (x = 0; x < w; ++x)
3358         {
3359             dst_s[x] = float_32_to_16(src_f + x);
3360         }
3361     }
3362 }
3363
3364 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3365         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3366 {
3367     static const unsigned char convert_5to8[] =
3368     {
3369         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3370         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3371         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3372         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3373     };
3374     static const unsigned char convert_6to8[] =
3375     {
3376         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3377         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3378         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3379         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3380         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3381         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3382         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3383         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3384     };
3385     unsigned int x, y;
3386
3387     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3388
3389     for (y = 0; y < h; ++y)
3390     {
3391         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3392         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3393         for (x = 0; x < w; ++x)
3394         {
3395             WORD pixel = src_line[x];
3396             dst_line[x] = 0xff000000
3397                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3398                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3399                     | convert_5to8[(pixel & 0x001f)];
3400         }
3401     }
3402 }
3403
3404 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3405  * in both cases we're just setting the X / Alpha channel to 0xff. */
3406 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3407         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3408 {
3409     unsigned int x, y;
3410
3411     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3412
3413     for (y = 0; y < h; ++y)
3414     {
3415         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3416         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3417
3418         for (x = 0; x < w; ++x)
3419         {
3420             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3421         }
3422     }
3423 }
3424
3425 static inline BYTE cliptobyte(int x)
3426 {
3427     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3428 }
3429
3430 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3431         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3432 {
3433     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3434     unsigned int x, y;
3435
3436     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3437
3438     for (y = 0; y < h; ++y)
3439     {
3440         const BYTE *src_line = src + y * pitch_in;
3441         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3442         for (x = 0; x < w; ++x)
3443         {
3444             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3445              *     C = Y - 16; D = U - 128; E = V - 128;
3446              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3447              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3448              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3449              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3450              * U and V are shared between the pixels. */
3451             if (!(x & 1)) /* For every even pixel, read new U and V. */
3452             {
3453                 d = (int) src_line[1] - 128;
3454                 e = (int) src_line[3] - 128;
3455                 r2 = 409 * e + 128;
3456                 g2 = - 100 * d - 208 * e + 128;
3457                 b2 = 516 * d + 128;
3458             }
3459             c2 = 298 * ((int) src_line[0] - 16);
3460             dst_line[x] = 0xff000000
3461                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3462                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3463                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3464                 /* Scale RGB values to 0..255 range,
3465                  * then clip them if still not in range (may be negative),
3466                  * then shift them within DWORD if necessary. */
3467             src_line += 2;
3468         }
3469     }
3470 }
3471
3472 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3473         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3474 {
3475     unsigned int x, y;
3476     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3477
3478     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3479
3480     for (y = 0; y < h; ++y)
3481     {
3482         const BYTE *src_line = src + y * pitch_in;
3483         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3484         for (x = 0; x < w; ++x)
3485         {
3486             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3487              *     C = Y - 16; D = U - 128; E = V - 128;
3488              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3489              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3490              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3491              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3492              * U and V are shared between the pixels. */
3493             if (!(x & 1)) /* For every even pixel, read new U and V. */
3494             {
3495                 d = (int) src_line[1] - 128;
3496                 e = (int) src_line[3] - 128;
3497                 r2 = 409 * e + 128;
3498                 g2 = - 100 * d - 208 * e + 128;
3499                 b2 = 516 * d + 128;
3500             }
3501             c2 = 298 * ((int) src_line[0] - 16);
3502             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3503                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3504                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3505                 /* Scale RGB values to 0..255 range,
3506                  * then clip them if still not in range (may be negative),
3507                  * then shift them within DWORD if necessary. */
3508             src_line += 2;
3509         }
3510     }
3511 }
3512
3513 struct d3dfmt_convertor_desc
3514 {
3515     enum wined3d_format_id from, to;
3516     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3517 };
3518
3519 static const struct d3dfmt_convertor_desc convertors[] =
3520 {
3521     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3522     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3523     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3524     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3525     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3526     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3527 };
3528
3529 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3530         enum wined3d_format_id to)
3531 {
3532     unsigned int i;
3533
3534     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3535     {
3536         if (convertors[i].from == from && convertors[i].to == to)
3537             return &convertors[i];
3538     }
3539
3540     return NULL;
3541 }
3542
3543 /*****************************************************************************
3544  * surface_convert_format
3545  *
3546  * Creates a duplicate of a surface in a different format. Is used by Blt to
3547  * blit between surfaces with different formats.
3548  *
3549  * Parameters
3550  *  source: Source surface
3551  *  fmt: Requested destination format
3552  *
3553  *****************************************************************************/
3554 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3555 {
3556     struct wined3d_mapped_rect src_map, dst_map;
3557     const struct d3dfmt_convertor_desc *conv;
3558     struct wined3d_surface *ret = NULL;
3559     HRESULT hr;
3560
3561     conv = find_convertor(source->resource.format->id, to_fmt);
3562     if (!conv)
3563     {
3564         FIXME("Cannot find a conversion function from format %s to %s.\n",
3565                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3566         return NULL;
3567     }
3568
3569     wined3d_surface_create(source->resource.device, source->resource.width,
3570             source->resource.height, to_fmt, 0 /* level */, 0 /* usage */, WINED3DPOOL_SCRATCH,
3571             WINED3DMULTISAMPLE_NONE /* TODO: Multisampled conversion */, 0 /* MultiSampleQuality */,
3572             source->surface_type, WINED3D_SURFACE_MAPPABLE | WINED3D_SURFACE_DISCARD,
3573             NULL /* parent */, &wined3d_null_parent_ops, &ret);
3574     if (!ret)
3575     {
3576         ERR("Failed to create a destination surface for conversion.\n");
3577         return NULL;
3578     }
3579
3580     memset(&src_map, 0, sizeof(src_map));
3581     memset(&dst_map, 0, sizeof(dst_map));
3582
3583     hr = wined3d_surface_map(source, &src_map, NULL, WINED3DLOCK_READONLY);
3584     if (FAILED(hr))
3585     {
3586         ERR("Failed to lock the source surface.\n");
3587         wined3d_surface_decref(ret);
3588         return NULL;
3589     }
3590     hr = wined3d_surface_map(ret, &dst_map, NULL, WINED3DLOCK_READONLY);
3591     if (FAILED(hr))
3592     {
3593         ERR("Failed to lock the destination surface.\n");
3594         wined3d_surface_unmap(source);
3595         wined3d_surface_decref(ret);
3596         return NULL;
3597     }
3598
3599     conv->convert(src_map.data, dst_map.data, src_map.row_pitch, dst_map.row_pitch,
3600             source->resource.width, source->resource.height);
3601
3602     wined3d_surface_unmap(ret);
3603     wined3d_surface_unmap(source);
3604
3605     return ret;
3606 }
3607
3608 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3609         unsigned int bpp, UINT pitch, DWORD color)
3610 {
3611     BYTE *first;
3612     int x, y;
3613
3614     /* Do first row */
3615
3616 #define COLORFILL_ROW(type) \
3617 do { \
3618     type *d = (type *)buf; \
3619     for (x = 0; x < width; ++x) \
3620         d[x] = (type)color; \
3621 } while(0)
3622
3623     switch (bpp)
3624     {
3625         case 1:
3626             COLORFILL_ROW(BYTE);
3627             break;
3628
3629         case 2:
3630             COLORFILL_ROW(WORD);
3631             break;
3632
3633         case 3:
3634         {
3635             BYTE *d = buf;
3636             for (x = 0; x < width; ++x, d += 3)
3637             {
3638                 d[0] = (color      ) & 0xFF;
3639                 d[1] = (color >>  8) & 0xFF;
3640                 d[2] = (color >> 16) & 0xFF;
3641             }
3642             break;
3643         }
3644         case 4:
3645             COLORFILL_ROW(DWORD);
3646             break;
3647
3648         default:
3649             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3650             return WINED3DERR_NOTAVAILABLE;
3651     }
3652
3653 #undef COLORFILL_ROW
3654
3655     /* Now copy first row. */
3656     first = buf;
3657     for (y = 1; y < height; ++y)
3658     {
3659         buf += pitch;
3660         memcpy(buf, first, width * bpp);
3661     }
3662
3663     return WINED3D_OK;
3664 }
3665
3666 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3667 {
3668     TRACE("surface %p.\n", surface);
3669
3670     if (!(surface->flags & SFLAG_LOCKED))
3671     {
3672         WARN("Trying to unmap unmapped surface.\n");
3673         return WINEDDERR_NOTLOCKED;
3674     }
3675     surface->flags &= ~SFLAG_LOCKED;
3676
3677     surface->surface_ops->surface_unmap(surface);
3678
3679     return WINED3D_OK;
3680 }
3681
3682 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3683         struct wined3d_mapped_rect *mapped_rect, const RECT *rect, DWORD flags)
3684 {
3685     const struct wined3d_format *format = surface->resource.format;
3686
3687     TRACE("surface %p, mapped_rect %p, rect %s, flags %#x.\n",
3688             surface, mapped_rect, wine_dbgstr_rect(rect), flags);
3689
3690     if (surface->flags & SFLAG_LOCKED)
3691     {
3692         WARN("Surface is already mapped.\n");
3693         return WINED3DERR_INVALIDCALL;
3694     }
3695     if ((format->flags & WINED3DFMT_FLAG_BLOCKS)
3696             && rect && (rect->left || rect->top
3697             || rect->right != surface->resource.width
3698             || rect->bottom != surface->resource.height))
3699     {
3700         UINT width_mask = format->block_width - 1;
3701         UINT height_mask = format->block_height - 1;
3702
3703         if ((rect->left & width_mask) || (rect->right & width_mask)
3704                 || (rect->top & height_mask) || (rect->bottom & height_mask))
3705         {
3706             WARN("Map rect %s is misaligned for %ux%u blocks.\n",
3707                     wine_dbgstr_rect(rect), format->block_width, format->block_height);
3708
3709             if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3710                 return WINED3DERR_INVALIDCALL;
3711         }
3712     }
3713
3714     surface->flags |= SFLAG_LOCKED;
3715
3716     if (!(surface->flags & SFLAG_LOCKABLE))
3717         WARN("Trying to lock unlockable surface.\n");
3718
3719     /* Performance optimization: Count how often a surface is mapped, if it is
3720      * mapped regularly do not throw away the system memory copy. This avoids
3721      * the need to download the surface from OpenGL all the time. The surface
3722      * is still downloaded if the OpenGL texture is changed. */
3723     if (!(surface->flags & SFLAG_DYNLOCK))
3724     {
3725         if (++surface->lockCount > MAXLOCKCOUNT)
3726         {
3727             TRACE("Surface is mapped regularly, not freeing the system memory copy any more.\n");
3728             surface->flags |= SFLAG_DYNLOCK;
3729         }
3730     }
3731
3732     surface->surface_ops->surface_map(surface, rect, flags);
3733
3734     if (format->flags & WINED3DFMT_FLAG_BROKEN_PITCH)
3735         mapped_rect->row_pitch = surface->resource.width * format->byte_count;
3736     else
3737         mapped_rect->row_pitch = wined3d_surface_get_pitch(surface);
3738
3739     if (!rect)
3740     {
3741         mapped_rect->data = surface->resource.allocatedMemory;
3742         surface->lockedRect.left = 0;
3743         surface->lockedRect.top = 0;
3744         surface->lockedRect.right = surface->resource.width;
3745         surface->lockedRect.bottom = surface->resource.height;
3746     }
3747     else
3748     {
3749         if ((format->flags & (WINED3DFMT_FLAG_BLOCKS | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_BLOCKS)
3750         {
3751             /* Compressed textures are block based, so calculate the offset of
3752              * the block that contains the top-left pixel of the locked rectangle. */
3753             mapped_rect->data = surface->resource.allocatedMemory
3754                     + ((rect->top / format->block_height) * mapped_rect->row_pitch)
3755                     + ((rect->left / format->block_width) * format->block_byte_count);
3756         }
3757         else
3758         {
3759             mapped_rect->data = surface->resource.allocatedMemory
3760                     + (mapped_rect->row_pitch * rect->top)
3761                     + (rect->left * format->byte_count);
3762         }
3763         surface->lockedRect.left = rect->left;
3764         surface->lockedRect.top = rect->top;
3765         surface->lockedRect.right = rect->right;
3766         surface->lockedRect.bottom = rect->bottom;
3767     }
3768
3769     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3770     TRACE("Returning memory %p, pitch %u.\n", mapped_rect->data, mapped_rect->row_pitch);
3771
3772     return WINED3D_OK;
3773 }
3774
3775 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3776 {
3777     struct wined3d_mapped_rect map;
3778     HRESULT hr;
3779
3780     TRACE("surface %p, dc %p.\n", surface, dc);
3781
3782     if (surface->flags & SFLAG_USERPTR)
3783     {
3784         ERR("Not supported on surfaces with application-provided memory.\n");
3785         return WINEDDERR_NODC;
3786     }
3787
3788     /* Give more detailed info for ddraw. */
3789     if (surface->flags & SFLAG_DCINUSE)
3790         return WINEDDERR_DCALREADYCREATED;
3791
3792     /* Can't GetDC if the surface is locked. */
3793     if (surface->flags & SFLAG_LOCKED)
3794         return WINED3DERR_INVALIDCALL;
3795
3796     /* Create a DIB section if there isn't a dc yet. */
3797     if (!surface->hDC)
3798     {
3799         if (surface->flags & SFLAG_CLIENT)
3800         {
3801             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3802             surface_release_client_storage(surface);
3803         }
3804         hr = surface_create_dib_section(surface);
3805         if (FAILED(hr))
3806             return WINED3DERR_INVALIDCALL;
3807
3808         /* Use the DIB section from now on if we are not using a PBO. */
3809         if (!(surface->flags & SFLAG_PBO))
3810             surface->resource.allocatedMemory = surface->dib.bitmap_data;
3811     }
3812
3813     /* Map the surface. */
3814     hr = wined3d_surface_map(surface, &map, NULL, 0);
3815     if (FAILED(hr))
3816     {
3817         ERR("Map failed, hr %#x.\n", hr);
3818         return hr;
3819     }
3820
3821     /* Sync the DIB with the PBO. This can't be done earlier because Map()
3822      * activates the allocatedMemory. */
3823     if (surface->flags & SFLAG_PBO)
3824         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
3825
3826     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3827             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3828     {
3829         /* GetDC on palettized formats is unsupported in D3D9, and the method
3830          * is missing in D3D8, so this should only be used for DX <=7
3831          * surfaces (with non-device palettes). */
3832         const PALETTEENTRY *pal = NULL;
3833
3834         if (surface->palette)
3835         {
3836             pal = surface->palette->palents;
3837         }
3838         else
3839         {
3840             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3841             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3842
3843             if (dds_primary && dds_primary->palette)
3844                 pal = dds_primary->palette->palents;
3845         }
3846
3847         if (pal)
3848         {
3849             RGBQUAD col[256];
3850             unsigned int i;
3851
3852             for (i = 0; i < 256; ++i)
3853             {
3854                 col[i].rgbRed = pal[i].peRed;
3855                 col[i].rgbGreen = pal[i].peGreen;
3856                 col[i].rgbBlue = pal[i].peBlue;
3857                 col[i].rgbReserved = 0;
3858             }
3859             SetDIBColorTable(surface->hDC, 0, 256, col);
3860         }
3861     }
3862
3863     surface->flags |= SFLAG_DCINUSE;
3864
3865     *dc = surface->hDC;
3866     TRACE("Returning dc %p.\n", *dc);
3867
3868     return WINED3D_OK;
3869 }
3870
3871 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3872 {
3873     TRACE("surface %p, dc %p.\n", surface, dc);
3874
3875     if (!(surface->flags & SFLAG_DCINUSE))
3876         return WINEDDERR_NODC;
3877
3878     if (surface->hDC != dc)
3879     {
3880         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3881                 dc, surface->hDC);
3882         return WINEDDERR_NODC;
3883     }
3884
3885     /* Copy the contents of the DIB over to the PBO. */
3886     if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
3887         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
3888
3889     /* We locked first, so unlock now. */
3890     wined3d_surface_unmap(surface);
3891
3892     surface->flags &= ~SFLAG_DCINUSE;
3893
3894     return WINED3D_OK;
3895 }
3896
3897 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3898 {
3899     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3900
3901     if (flags)
3902     {
3903         static UINT once;
3904         if (!once++)
3905             FIXME("Ignoring flags %#x.\n", flags);
3906         else
3907             WARN("Ignoring flags %#x.\n", flags);
3908     }
3909
3910     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
3911     {
3912         ERR("Not supported on swapchain surfaces.\n");
3913         return WINEDDERR_NOTFLIPPABLE;
3914     }
3915
3916     /* Flipping is only supported on render targets and overlays. */
3917     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
3918     {
3919         WARN("Tried to flip a non-render target, non-overlay surface.\n");
3920         return WINEDDERR_NOTFLIPPABLE;
3921     }
3922
3923     flip_surface(surface, override);
3924
3925     /* Update overlays if they're visible. */
3926     if ((surface->resource.usage & WINED3DUSAGE_OVERLAY) && surface->overlay_dest)
3927         return surface_draw_overlay(surface);
3928
3929     return WINED3D_OK;
3930 }
3931
3932 /* Do not call while under the GL lock. */
3933 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3934 {
3935     struct wined3d_device *device = surface->resource.device;
3936
3937     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3938
3939     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3940     {
3941         struct wined3d_texture *texture = surface->container.u.texture;
3942
3943         TRACE("Passing to container (%p).\n", texture);
3944         texture->texture_ops->texture_preload(texture, srgb);
3945     }
3946     else
3947     {
3948         struct wined3d_context *context;
3949
3950         TRACE("(%p) : About to load surface\n", surface);
3951
3952         /* TODO: Use already acquired context when possible. */
3953         context = context_acquire(device, NULL);
3954
3955         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3956
3957         if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3958         {
3959             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3960             GLclampf tmp;
3961             tmp = 0.9f;
3962             ENTER_GL();
3963             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3964             LEAVE_GL();
3965         }
3966
3967         context_release(context);
3968     }
3969 }
3970
3971 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3972 {
3973     if (!surface->resource.allocatedMemory)
3974     {
3975         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
3976                 surface->resource.size + RESOURCE_ALIGNMENT);
3977         if (!surface->resource.heapMemory)
3978         {
3979             ERR("Out of memory\n");
3980             return FALSE;
3981         }
3982         surface->resource.allocatedMemory =
3983             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
3984     }
3985     else
3986     {
3987         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
3988     }
3989
3990     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3991
3992     return TRUE;
3993 }
3994
3995 /* Read the framebuffer back into the surface */
3996 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
3997 {
3998     struct wined3d_device *device = surface->resource.device;
3999     const struct wined3d_gl_info *gl_info;
4000     struct wined3d_context *context;
4001     BYTE *mem;
4002     GLint fmt;
4003     GLint type;
4004     BYTE *row, *top, *bottom;
4005     int i;
4006     BOOL bpp;
4007     RECT local_rect;
4008     BOOL srcIsUpsideDown;
4009     GLint rowLen = 0;
4010     GLint skipPix = 0;
4011     GLint skipRow = 0;
4012
4013     context = context_acquire(device, surface);
4014     context_apply_blit_state(context, device);
4015     gl_info = context->gl_info;
4016
4017     ENTER_GL();
4018
4019     /* Select the correct read buffer, and give some debug output.
4020      * There is no need to keep track of the current read buffer or reset it, every part of the code
4021      * that reads sets the read buffer as desired.
4022      */
4023     if (surface_is_offscreen(surface))
4024     {
4025         /* Mapping the primary render target which is not on a swapchain.
4026          * Read from the back buffer. */
4027         TRACE("Mapping offscreen render target.\n");
4028         glReadBuffer(device->offscreenBuffer);
4029         srcIsUpsideDown = TRUE;
4030     }
4031     else
4032     {
4033         /* Onscreen surfaces are always part of a swapchain */
4034         GLenum buffer = surface_get_gl_buffer(surface);
4035         TRACE("Mapping %#x buffer.\n", buffer);
4036         glReadBuffer(buffer);
4037         checkGLcall("glReadBuffer");
4038         srcIsUpsideDown = FALSE;
4039     }
4040
4041     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
4042     if (!rect)
4043     {
4044         local_rect.left = 0;
4045         local_rect.top = 0;
4046         local_rect.right = surface->resource.width;
4047         local_rect.bottom = surface->resource.height;
4048     }
4049     else
4050     {
4051         local_rect = *rect;
4052     }
4053     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
4054
4055     switch (surface->resource.format->id)
4056     {
4057         case WINED3DFMT_P8_UINT:
4058         {
4059             if (primary_render_target_is_p8(device))
4060             {
4061                 /* In case of P8 render targets the index is stored in the alpha component */
4062                 fmt = GL_ALPHA;
4063                 type = GL_UNSIGNED_BYTE;
4064                 mem = dest;
4065                 bpp = surface->resource.format->byte_count;
4066             }
4067             else
4068             {
4069                 /* GL can't return palettized data, so read ARGB pixels into a
4070                  * separate block of memory and convert them into palettized format
4071                  * in software. Slow, but if the app means to use palettized render
4072                  * targets and locks it...
4073                  *
4074                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4075                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4076                  * for the color channels when palettizing the colors.
4077                  */
4078                 fmt = GL_RGB;
4079                 type = GL_UNSIGNED_BYTE;
4080                 pitch *= 3;
4081                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4082                 if (!mem)
4083                 {
4084                     ERR("Out of memory\n");
4085                     LEAVE_GL();
4086                     return;
4087                 }
4088                 bpp = surface->resource.format->byte_count * 3;
4089             }
4090         }
4091         break;
4092
4093         default:
4094             mem = dest;
4095             fmt = surface->resource.format->glFormat;
4096             type = surface->resource.format->glType;
4097             bpp = surface->resource.format->byte_count;
4098     }
4099
4100     if (surface->flags & SFLAG_PBO)
4101     {
4102         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4103         checkGLcall("glBindBufferARB");
4104         if (mem)
4105         {
4106             ERR("mem not null for pbo -- unexpected\n");
4107             mem = NULL;
4108         }
4109     }
4110
4111     /* Save old pixel store pack state */
4112     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4113     checkGLcall("glGetIntegerv");
4114     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4115     checkGLcall("glGetIntegerv");
4116     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4117     checkGLcall("glGetIntegerv");
4118
4119     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4120     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4121     checkGLcall("glPixelStorei");
4122     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4123     checkGLcall("glPixelStorei");
4124     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4125     checkGLcall("glPixelStorei");
4126
4127     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4128             local_rect.right - local_rect.left,
4129             local_rect.bottom - local_rect.top,
4130             fmt, type, mem);
4131     checkGLcall("glReadPixels");
4132
4133     /* Reset previous pixel store pack state */
4134     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4135     checkGLcall("glPixelStorei");
4136     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4137     checkGLcall("glPixelStorei");
4138     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4139     checkGLcall("glPixelStorei");
4140
4141     if (surface->flags & SFLAG_PBO)
4142     {
4143         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4144         checkGLcall("glBindBufferARB");
4145
4146         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4147          * to get a pointer to it and perform the flipping in software. This is a lot
4148          * faster than calling glReadPixels for each line. In case we want more speed
4149          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4150         if (!srcIsUpsideDown)
4151         {
4152             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4153             checkGLcall("glBindBufferARB");
4154
4155             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4156             checkGLcall("glMapBufferARB");
4157         }
4158     }
4159
4160     /* TODO: Merge this with the palettization loop below for P8 targets */
4161     if(!srcIsUpsideDown) {
4162         UINT len, off;
4163         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4164             Flip the lines in software */
4165         len = (local_rect.right - local_rect.left) * bpp;
4166         off = local_rect.left * bpp;
4167
4168         row = HeapAlloc(GetProcessHeap(), 0, len);
4169         if(!row) {
4170             ERR("Out of memory\n");
4171             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4172                 HeapFree(GetProcessHeap(), 0, mem);
4173             LEAVE_GL();
4174             return;
4175         }
4176
4177         top = mem + pitch * local_rect.top;
4178         bottom = mem + pitch * (local_rect.bottom - 1);
4179         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4180             memcpy(row, top + off, len);
4181             memcpy(top + off, bottom + off, len);
4182             memcpy(bottom + off, row, len);
4183             top += pitch;
4184             bottom -= pitch;
4185         }
4186         HeapFree(GetProcessHeap(), 0, row);
4187
4188         /* Unmap the temp PBO buffer */
4189         if (surface->flags & SFLAG_PBO)
4190         {
4191             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4192             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4193         }
4194     }
4195
4196     LEAVE_GL();
4197     context_release(context);
4198
4199     /* For P8 textures we need to perform an inverse palette lookup. This is
4200      * done by searching for a palette index which matches the RGB value.
4201      * Note this isn't guaranteed to work when there are multiple entries for
4202      * the same color but we have no choice. In case of P8 render targets,
4203      * the index is stored in the alpha component so no conversion is needed. */
4204     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4205     {
4206         const PALETTEENTRY *pal = NULL;
4207         DWORD width = pitch / 3;
4208         int x, y, c;
4209
4210         if (surface->palette)
4211         {
4212             pal = surface->palette->palents;
4213         }
4214         else
4215         {
4216             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4217             HeapFree(GetProcessHeap(), 0, mem);
4218             return;
4219         }
4220
4221         for(y = local_rect.top; y < local_rect.bottom; y++) {
4222             for(x = local_rect.left; x < local_rect.right; x++) {
4223                 /*                      start              lines            pixels      */
4224                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4225                 const BYTE *green = blue  + 1;
4226                 const BYTE *red = green + 1;
4227
4228                 for(c = 0; c < 256; c++) {
4229                     if(*red   == pal[c].peRed   &&
4230                        *green == pal[c].peGreen &&
4231                        *blue  == pal[c].peBlue)
4232                     {
4233                         *((BYTE *) dest + y * width + x) = c;
4234                         break;
4235                     }
4236                 }
4237             }
4238         }
4239         HeapFree(GetProcessHeap(), 0, mem);
4240     }
4241 }
4242
4243 /* Read the framebuffer contents into a texture. Note that this function
4244  * doesn't do any kind of flipping. Using this on an onscreen surface will
4245  * result in a flipped D3D texture. */
4246 void surface_load_fb_texture(struct wined3d_surface *surface, BOOL srgb)
4247 {
4248     struct wined3d_device *device = surface->resource.device;
4249     struct wined3d_context *context;
4250
4251     context = context_acquire(device, surface);
4252     device_invalidate_state(device, STATE_FRAMEBUFFER);
4253
4254     surface_prepare_texture(surface, context, srgb);
4255     surface_bind_and_dirtify(surface, context, srgb);
4256
4257     TRACE("Reading back offscreen render target %p.\n", surface);
4258
4259     ENTER_GL();
4260
4261     if (surface_is_offscreen(surface))
4262         glReadBuffer(device->offscreenBuffer);
4263     else
4264         glReadBuffer(surface_get_gl_buffer(surface));
4265     checkGLcall("glReadBuffer");
4266
4267     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4268             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4269     checkGLcall("glCopyTexSubImage2D");
4270
4271     LEAVE_GL();
4272
4273     context_release(context);
4274 }
4275
4276 /* Context activation is done by the caller. */
4277 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4278         struct wined3d_context *context, BOOL srgb)
4279 {
4280     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4281     CONVERT_TYPES convert;
4282     struct wined3d_format format;
4283
4284     if (surface->flags & alloc_flag) return;
4285
4286     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4287     if (convert != NO_CONVERSION || format.convert) surface->flags |= SFLAG_CONVERTED;
4288     else surface->flags &= ~SFLAG_CONVERTED;
4289
4290     surface_bind_and_dirtify(surface, context, srgb);
4291     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4292     surface->flags |= alloc_flag;
4293 }
4294
4295 /* Context activation is done by the caller. */
4296 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4297 {
4298     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4299     {
4300         struct wined3d_texture *texture = surface->container.u.texture;
4301         UINT sub_count = texture->level_count * texture->layer_count;
4302         UINT i;
4303
4304         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4305
4306         for (i = 0; i < sub_count; ++i)
4307         {
4308             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4309             surface_prepare_texture_internal(s, context, srgb);
4310         }
4311
4312         return;
4313     }
4314
4315     surface_prepare_texture_internal(surface, context, srgb);
4316 }
4317
4318 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4319 {
4320     if (multisample)
4321     {
4322         if (surface->rb_multisample)
4323             return;
4324
4325         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4326         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4327         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4328                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4329         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4330     }
4331     else
4332     {
4333         if (surface->rb_resolved)
4334             return;
4335
4336         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4337         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4338         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4339                 surface->pow2Width, surface->pow2Height);
4340         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4341     }
4342 }
4343
4344 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4345         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4346 {
4347     struct wined3d_device *device = surface->resource.device;
4348     UINT pitch = wined3d_surface_get_pitch(surface);
4349     const struct wined3d_gl_info *gl_info;
4350     struct wined3d_context *context;
4351     RECT local_rect;
4352     UINT w, h;
4353
4354     surface_get_rect(surface, rect, &local_rect);
4355
4356     mem += local_rect.top * pitch + local_rect.left * bpp;
4357     w = local_rect.right - local_rect.left;
4358     h = local_rect.bottom - local_rect.top;
4359
4360     /* Activate the correct context for the render target */
4361     context = context_acquire(device, surface);
4362     context_apply_blit_state(context, device);
4363     gl_info = context->gl_info;
4364
4365     ENTER_GL();
4366
4367     if (!surface_is_offscreen(surface))
4368     {
4369         GLenum buffer = surface_get_gl_buffer(surface);
4370         TRACE("Unlocking %#x buffer.\n", buffer);
4371         context_set_draw_buffer(context, buffer);
4372
4373         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4374         glPixelZoom(1.0f, -1.0f);
4375     }
4376     else
4377     {
4378         /* Primary offscreen render target */
4379         TRACE("Offscreen render target.\n");
4380         context_set_draw_buffer(context, device->offscreenBuffer);
4381
4382         glPixelZoom(1.0f, 1.0f);
4383     }
4384
4385     glRasterPos3i(local_rect.left, local_rect.top, 1);
4386     checkGLcall("glRasterPos3i");
4387
4388     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4389     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4390
4391     if (surface->flags & SFLAG_PBO)
4392     {
4393         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4394         checkGLcall("glBindBufferARB");
4395     }
4396
4397     glDrawPixels(w, h, fmt, type, mem);
4398     checkGLcall("glDrawPixels");
4399
4400     if (surface->flags & SFLAG_PBO)
4401     {
4402         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4403         checkGLcall("glBindBufferARB");
4404     }
4405
4406     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4407     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4408
4409     LEAVE_GL();
4410
4411     if (wined3d_settings.strict_draw_ordering
4412             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4413             && surface->container.u.swapchain->front_buffer == surface))
4414         wglFlush();
4415
4416     context_release(context);
4417 }
4418
4419 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck,
4420         BOOL use_texturing, struct wined3d_format *format, CONVERT_TYPES *convert)
4421 {
4422     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4423     const struct wined3d_device *device = surface->resource.device;
4424     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4425     BOOL blit_supported = FALSE;
4426
4427     /* Copy the default values from the surface. Below we might perform fixups */
4428     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4429     *format = *surface->resource.format;
4430     *convert = NO_CONVERSION;
4431
4432     /* Ok, now look if we have to do any conversion */
4433     switch (surface->resource.format->id)
4434     {
4435         case WINED3DFMT_P8_UINT:
4436             /* Below the call to blit_supported is disabled for Wine 1.2
4437              * because the function isn't operating correctly yet. At the
4438              * moment 8-bit blits are handled in software and if certain GL
4439              * extensions are around, surface conversion is performed at
4440              * upload time. The blit_supported call recognizes it as a
4441              * destination fixup. This type of upload 'fixup' and 8-bit to
4442              * 8-bit blits need to be handled by the blit_shader.
4443              * TODO: get rid of this #if 0. */
4444 #if 0
4445             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4446                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4447                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4448 #endif
4449             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4450
4451             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4452              * texturing. Further also use conversion in case of color keying.
4453              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4454              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4455              * conflicts with this.
4456              */
4457             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4458                     || colorkey_active || !use_texturing)
4459             {
4460                 format->glFormat = GL_RGBA;
4461                 format->glInternal = GL_RGBA;
4462                 format->glType = GL_UNSIGNED_BYTE;
4463                 format->conv_byte_count = 4;
4464                 if (colorkey_active)
4465                     *convert = CONVERT_PALETTED_CK;
4466                 else
4467                     *convert = CONVERT_PALETTED;
4468             }
4469             break;
4470
4471         case WINED3DFMT_B2G3R3_UNORM:
4472             /* **********************
4473                 GL_UNSIGNED_BYTE_3_3_2
4474                 ********************** */
4475             if (colorkey_active) {
4476                 /* This texture format will never be used.. So do not care about color keying
4477                     up until the point in time it will be needed :-) */
4478                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4479             }
4480             break;
4481
4482         case WINED3DFMT_B5G6R5_UNORM:
4483             if (colorkey_active)
4484             {
4485                 *convert = CONVERT_CK_565;
4486                 format->glFormat = GL_RGBA;
4487                 format->glInternal = GL_RGB5_A1;
4488                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4489                 format->conv_byte_count = 2;
4490             }
4491             break;
4492
4493         case WINED3DFMT_B5G5R5X1_UNORM:
4494             if (colorkey_active)
4495             {
4496                 *convert = CONVERT_CK_5551;
4497                 format->glFormat = GL_BGRA;
4498                 format->glInternal = GL_RGB5_A1;
4499                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4500                 format->conv_byte_count = 2;
4501             }
4502             break;
4503
4504         case WINED3DFMT_B8G8R8_UNORM:
4505             if (colorkey_active)
4506             {
4507                 *convert = CONVERT_CK_RGB24;
4508                 format->glFormat = GL_RGBA;
4509                 format->glInternal = GL_RGBA8;
4510                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4511                 format->conv_byte_count = 4;
4512             }
4513             break;
4514
4515         case WINED3DFMT_B8G8R8X8_UNORM:
4516             if (colorkey_active)
4517             {
4518                 *convert = CONVERT_RGB32_888;
4519                 format->glFormat = GL_RGBA;
4520                 format->glInternal = GL_RGBA8;
4521                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4522                 format->conv_byte_count = 4;
4523             }
4524             break;
4525
4526         default:
4527             break;
4528     }
4529
4530     return WINED3D_OK;
4531 }
4532
4533 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4534 {
4535     const struct wined3d_device *device = surface->resource.device;
4536     const struct wined3d_palette *pal = surface->palette;
4537     BOOL index_in_alpha = FALSE;
4538     unsigned int i;
4539
4540     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4541      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4542      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4543      * duplicate entries. Store the color key in the unused alpha component to speed the
4544      * download up and to make conversion unneeded. */
4545     index_in_alpha = primary_render_target_is_p8(device);
4546
4547     if (!pal)
4548     {
4549         ERR("This code should never get entered for DirectDraw!, expect problems\n");
4550         if (index_in_alpha)
4551         {
4552             /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4553              * there's no palette at this time. */
4554             for (i = 0; i < 256; i++) table[i][3] = i;
4555         }
4556     }
4557     else
4558     {
4559         TRACE("Using surface palette %p\n", pal);
4560         /* Get the surface's palette */
4561         for (i = 0; i < 256; ++i)
4562         {
4563             table[i][0] = pal->palents[i].peRed;
4564             table[i][1] = pal->palents[i].peGreen;
4565             table[i][2] = pal->palents[i].peBlue;
4566
4567             /* When index_in_alpha is set the palette index is stored in the
4568              * alpha component. In case of a readback we can then read
4569              * GL_ALPHA. Color keying is handled in BltOverride using a
4570              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4571              * color key itself is passed to glAlphaFunc in other cases the
4572              * alpha component of pixels that should be masked away is set to 0. */
4573             if (index_in_alpha)
4574             {
4575                 table[i][3] = i;
4576             }
4577             else if (colorkey && (i >= surface->SrcBltCKey.dwColorSpaceLowValue)
4578                     && (i <= surface->SrcBltCKey.dwColorSpaceHighValue))
4579             {
4580                 table[i][3] = 0x00;
4581             }
4582             else if (pal->flags & WINEDDPCAPS_ALPHA)
4583             {
4584                 table[i][3] = pal->palents[i].peFlags;
4585             }
4586             else
4587             {
4588                 table[i][3] = 0xFF;
4589             }
4590         }
4591     }
4592 }
4593
4594 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width,
4595         UINT height, UINT outpitch, CONVERT_TYPES convert, struct wined3d_surface *surface)
4596 {
4597     const BYTE *source;
4598     BYTE *dest;
4599     TRACE("(%p)->(%p),(%d,%d,%d,%d,%p)\n", src, dst, pitch, height, outpitch, convert, surface);
4600
4601     switch (convert) {
4602         case NO_CONVERSION:
4603         {
4604             memcpy(dst, src, pitch * height);
4605             break;
4606         }
4607         case CONVERT_PALETTED:
4608         case CONVERT_PALETTED_CK:
4609         {
4610             BYTE table[256][4];
4611             unsigned int x, y;
4612
4613             d3dfmt_p8_init_palette(surface, table, (convert == CONVERT_PALETTED_CK));
4614
4615             for (y = 0; y < height; y++)
4616             {
4617                 source = src + pitch * y;
4618                 dest = dst + outpitch * y;
4619                 /* This is an 1 bpp format, using the width here is fine */
4620                 for (x = 0; x < width; x++) {
4621                     BYTE color = *source++;
4622                     *dest++ = table[color][0];
4623                     *dest++ = table[color][1];
4624                     *dest++ = table[color][2];
4625                     *dest++ = table[color][3];
4626                 }
4627             }
4628         }
4629         break;
4630
4631         case CONVERT_CK_565:
4632         {
4633             /* Converting the 565 format in 5551 packed to emulate color-keying.
4634
4635               Note : in all these conversion, it would be best to average the averaging
4636                       pixels to get the color of the pixel that will be color-keyed to
4637                       prevent 'color bleeding'. This will be done later on if ever it is
4638                       too visible.
4639
4640               Note2: Nvidia documents say that their driver does not support alpha + color keying
4641                      on the same surface and disables color keying in such a case
4642             */
4643             unsigned int x, y;
4644             const WORD *Source;
4645             WORD *Dest;
4646
4647             TRACE("Color keyed 565\n");
4648
4649             for (y = 0; y < height; y++) {
4650                 Source = (const WORD *)(src + y * pitch);
4651                 Dest = (WORD *) (dst + y * outpitch);
4652                 for (x = 0; x < width; x++ ) {
4653                     WORD color = *Source++;
4654                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4655                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4656                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4657                         *Dest |= 0x0001;
4658                     Dest++;
4659                 }
4660             }
4661         }
4662         break;
4663
4664         case CONVERT_CK_5551:
4665         {
4666             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4667             unsigned int x, y;
4668             const WORD *Source;
4669             WORD *Dest;
4670             TRACE("Color keyed 5551\n");
4671             for (y = 0; y < height; y++) {
4672                 Source = (const WORD *)(src + y * pitch);
4673                 Dest = (WORD *) (dst + y * outpitch);
4674                 for (x = 0; x < width; x++ ) {
4675                     WORD color = *Source++;
4676                     *Dest = color;
4677                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4678                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4679                         *Dest |= (1 << 15);
4680                     else
4681                         *Dest &= ~(1 << 15);
4682                     Dest++;
4683                 }
4684             }
4685         }
4686         break;
4687
4688         case CONVERT_CK_RGB24:
4689         {
4690             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4691             unsigned int x, y;
4692             for (y = 0; y < height; y++)
4693             {
4694                 source = src + pitch * y;
4695                 dest = dst + outpitch * y;
4696                 for (x = 0; x < width; x++) {
4697                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4698                     DWORD dstcolor = color << 8;
4699                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4700                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4701                         dstcolor |= 0xff;
4702                     *(DWORD*)dest = dstcolor;
4703                     source += 3;
4704                     dest += 4;
4705                 }
4706             }
4707         }
4708         break;
4709
4710         case CONVERT_RGB32_888:
4711         {
4712             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4713             unsigned int x, y;
4714             for (y = 0; y < height; y++)
4715             {
4716                 source = src + pitch * y;
4717                 dest = dst + outpitch * y;
4718                 for (x = 0; x < width; x++) {
4719                     DWORD color = 0xffffff & *(const DWORD*)source;
4720                     DWORD dstcolor = color << 8;
4721                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4722                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4723                         dstcolor |= 0xff;
4724                     *(DWORD*)dest = dstcolor;
4725                     source += 4;
4726                     dest += 4;
4727                 }
4728             }
4729         }
4730         break;
4731
4732         default:
4733             ERR("Unsupported conversion type %#x.\n", convert);
4734     }
4735     return WINED3D_OK;
4736 }
4737
4738 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4739 {
4740     /* Flip the surface contents */
4741     /* Flip the DC */
4742     {
4743         HDC tmp;
4744         tmp = front->hDC;
4745         front->hDC = back->hDC;
4746         back->hDC = tmp;
4747     }
4748
4749     /* Flip the DIBsection */
4750     {
4751         HBITMAP tmp;
4752         BOOL hasDib = front->flags & SFLAG_DIBSECTION;
4753         tmp = front->dib.DIBsection;
4754         front->dib.DIBsection = back->dib.DIBsection;
4755         back->dib.DIBsection = tmp;
4756
4757         if (back->flags & SFLAG_DIBSECTION) front->flags |= SFLAG_DIBSECTION;
4758         else front->flags &= ~SFLAG_DIBSECTION;
4759         if (hasDib) back->flags |= SFLAG_DIBSECTION;
4760         else back->flags &= ~SFLAG_DIBSECTION;
4761     }
4762
4763     /* Flip the surface data */
4764     {
4765         void* tmp;
4766
4767         tmp = front->dib.bitmap_data;
4768         front->dib.bitmap_data = back->dib.bitmap_data;
4769         back->dib.bitmap_data = tmp;
4770
4771         tmp = front->resource.allocatedMemory;
4772         front->resource.allocatedMemory = back->resource.allocatedMemory;
4773         back->resource.allocatedMemory = tmp;
4774
4775         tmp = front->resource.heapMemory;
4776         front->resource.heapMemory = back->resource.heapMemory;
4777         back->resource.heapMemory = tmp;
4778     }
4779
4780     /* Flip the PBO */
4781     {
4782         GLuint tmp_pbo = front->pbo;
4783         front->pbo = back->pbo;
4784         back->pbo = tmp_pbo;
4785     }
4786
4787     /* Flip the opengl texture */
4788     {
4789         GLuint tmp;
4790
4791         tmp = back->texture_name;
4792         back->texture_name = front->texture_name;
4793         front->texture_name = tmp;
4794
4795         tmp = back->texture_name_srgb;
4796         back->texture_name_srgb = front->texture_name_srgb;
4797         front->texture_name_srgb = tmp;
4798
4799         tmp = back->rb_multisample;
4800         back->rb_multisample = front->rb_multisample;
4801         front->rb_multisample = tmp;
4802
4803         tmp = back->rb_resolved;
4804         back->rb_resolved = front->rb_resolved;
4805         front->rb_resolved = tmp;
4806
4807         resource_unload(&back->resource);
4808         resource_unload(&front->resource);
4809     }
4810
4811     {
4812         DWORD tmp_flags = back->flags;
4813         back->flags = front->flags;
4814         front->flags = tmp_flags;
4815     }
4816 }
4817
4818 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4819  * pixel copy calls. */
4820 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4821         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4822 {
4823     struct wined3d_device *device = dst_surface->resource.device;
4824     float xrel, yrel;
4825     UINT row;
4826     struct wined3d_context *context;
4827     BOOL upsidedown = FALSE;
4828     RECT dst_rect = *dst_rect_in;
4829
4830     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4831      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4832      */
4833     if(dst_rect.top > dst_rect.bottom) {
4834         UINT tmp = dst_rect.bottom;
4835         dst_rect.bottom = dst_rect.top;
4836         dst_rect.top = tmp;
4837         upsidedown = TRUE;
4838     }
4839
4840     context = context_acquire(device, src_surface);
4841     context_apply_blit_state(context, device);
4842     surface_internal_preload(dst_surface, SRGB_RGB);
4843     ENTER_GL();
4844
4845     /* Bind the target texture */
4846     context_bind_texture(context, dst_surface->texture_target, dst_surface->texture_name);
4847     if (surface_is_offscreen(src_surface))
4848     {
4849         TRACE("Reading from an offscreen target\n");
4850         upsidedown = !upsidedown;
4851         glReadBuffer(device->offscreenBuffer);
4852     }
4853     else
4854     {
4855         glReadBuffer(surface_get_gl_buffer(src_surface));
4856     }
4857     checkGLcall("glReadBuffer");
4858
4859     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4860     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4861
4862     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4863     {
4864         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4865
4866         if(Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT) {
4867             ERR("Texture filtering not supported in direct blit\n");
4868         }
4869     }
4870     else if ((Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT)
4871             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4872     {
4873         ERR("Texture filtering not supported in direct blit\n");
4874     }
4875
4876     if (upsidedown
4877             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4878             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4879     {
4880         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4881
4882         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4883                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4884                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4885                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4886     }
4887     else
4888     {
4889         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4890         /* I have to process this row by row to swap the image,
4891          * otherwise it would be upside down, so stretching in y direction
4892          * doesn't cost extra time
4893          *
4894          * However, stretching in x direction can be avoided if not necessary
4895          */
4896         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4897             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4898             {
4899                 /* Well, that stuff works, but it's very slow.
4900                  * find a better way instead
4901                  */
4902                 UINT col;
4903
4904                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4905                 {
4906                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4907                             dst_rect.left + col /* x offset */, row /* y offset */,
4908                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4909                 }
4910             }
4911             else
4912             {
4913                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4914                         dst_rect.left /* x offset */, row /* y offset */,
4915                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4916             }
4917         }
4918     }
4919     checkGLcall("glCopyTexSubImage2D");
4920
4921     LEAVE_GL();
4922     context_release(context);
4923
4924     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4925      * path is never entered
4926      */
4927     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4928 }
4929
4930 /* Uses the hardware to stretch and flip the image */
4931 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4932         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4933 {
4934     struct wined3d_device *device = dst_surface->resource.device;
4935     struct wined3d_swapchain *src_swapchain = NULL;
4936     GLuint src, backup = 0;
4937     float left, right, top, bottom; /* Texture coordinates */
4938     UINT fbwidth = src_surface->resource.width;
4939     UINT fbheight = src_surface->resource.height;
4940     struct wined3d_context *context;
4941     GLenum drawBuffer = GL_BACK;
4942     GLenum texture_target;
4943     BOOL noBackBufferBackup;
4944     BOOL src_offscreen;
4945     BOOL upsidedown = FALSE;
4946     RECT dst_rect = *dst_rect_in;
4947
4948     TRACE("Using hwstretch blit\n");
4949     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4950     context = context_acquire(device, src_surface);
4951     context_apply_blit_state(context, device);
4952     surface_internal_preload(dst_surface, SRGB_RGB);
4953
4954     src_offscreen = surface_is_offscreen(src_surface);
4955     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4956     if (!noBackBufferBackup && !src_surface->texture_name)
4957     {
4958         /* Get it a description */
4959         surface_internal_preload(src_surface, SRGB_RGB);
4960     }
4961     ENTER_GL();
4962
4963     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4964      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4965      */
4966     if (context->aux_buffers >= 2)
4967     {
4968         /* Got more than one aux buffer? Use the 2nd aux buffer */
4969         drawBuffer = GL_AUX1;
4970     }
4971     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4972     {
4973         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
4974         drawBuffer = GL_AUX0;
4975     }
4976
4977     if(noBackBufferBackup) {
4978         glGenTextures(1, &backup);
4979         checkGLcall("glGenTextures");
4980         context_bind_texture(context, GL_TEXTURE_2D, backup);
4981         texture_target = GL_TEXTURE_2D;
4982     } else {
4983         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
4984          * we are reading from the back buffer, the backup can be used as source texture
4985          */
4986         texture_target = src_surface->texture_target;
4987         context_bind_texture(context, texture_target, src_surface->texture_name);
4988         glEnable(texture_target);
4989         checkGLcall("glEnable(texture_target)");
4990
4991         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
4992         src_surface->flags &= ~SFLAG_INTEXTURE;
4993     }
4994
4995     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4996      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4997      */
4998     if(dst_rect.top > dst_rect.bottom) {
4999         UINT tmp = dst_rect.bottom;
5000         dst_rect.bottom = dst_rect.top;
5001         dst_rect.top = tmp;
5002         upsidedown = TRUE;
5003     }
5004
5005     if (src_offscreen)
5006     {
5007         TRACE("Reading from an offscreen target\n");
5008         upsidedown = !upsidedown;
5009         glReadBuffer(device->offscreenBuffer);
5010     }
5011     else
5012     {
5013         glReadBuffer(surface_get_gl_buffer(src_surface));
5014     }
5015
5016     /* TODO: Only back up the part that will be overwritten */
5017     glCopyTexSubImage2D(texture_target, 0,
5018                         0, 0 /* read offsets */,
5019                         0, 0,
5020                         fbwidth,
5021                         fbheight);
5022
5023     checkGLcall("glCopyTexSubImage2D");
5024
5025     /* No issue with overriding these - the sampler is dirty due to blit usage */
5026     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
5027             wined3d_gl_mag_filter(magLookup, Filter));
5028     checkGLcall("glTexParameteri");
5029     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
5030             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
5031     checkGLcall("glTexParameteri");
5032
5033     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5034         src_swapchain = src_surface->container.u.swapchain;
5035     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
5036     {
5037         src = backup ? backup : src_surface->texture_name;
5038     }
5039     else
5040     {
5041         glReadBuffer(GL_FRONT);
5042         checkGLcall("glReadBuffer(GL_FRONT)");
5043
5044         glGenTextures(1, &src);
5045         checkGLcall("glGenTextures(1, &src)");
5046         context_bind_texture(context, GL_TEXTURE_2D, src);
5047
5048         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5049          * out for power of 2 sizes
5050          */
5051         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5052                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5053         checkGLcall("glTexImage2D");
5054         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
5055                             0, 0 /* read offsets */,
5056                             0, 0,
5057                             fbwidth,
5058                             fbheight);
5059
5060         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5061         checkGLcall("glTexParameteri");
5062         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5063         checkGLcall("glTexParameteri");
5064
5065         glReadBuffer(GL_BACK);
5066         checkGLcall("glReadBuffer(GL_BACK)");
5067
5068         if(texture_target != GL_TEXTURE_2D) {
5069             glDisable(texture_target);
5070             glEnable(GL_TEXTURE_2D);
5071             texture_target = GL_TEXTURE_2D;
5072         }
5073     }
5074     checkGLcall("glEnd and previous");
5075
5076     left = src_rect->left;
5077     right = src_rect->right;
5078
5079     if (!upsidedown)
5080     {
5081         top = src_surface->resource.height - src_rect->top;
5082         bottom = src_surface->resource.height - src_rect->bottom;
5083     }
5084     else
5085     {
5086         top = src_surface->resource.height - src_rect->bottom;
5087         bottom = src_surface->resource.height - src_rect->top;
5088     }
5089
5090     if (src_surface->flags & SFLAG_NORMCOORD)
5091     {
5092         left /= src_surface->pow2Width;
5093         right /= src_surface->pow2Width;
5094         top /= src_surface->pow2Height;
5095         bottom /= src_surface->pow2Height;
5096     }
5097
5098     /* draw the source texture stretched and upside down. The correct surface is bound already */
5099     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5100     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5101
5102     context_set_draw_buffer(context, drawBuffer);
5103     glReadBuffer(drawBuffer);
5104
5105     glBegin(GL_QUADS);
5106         /* bottom left */
5107         glTexCoord2f(left, bottom);
5108         glVertex2i(0, 0);
5109
5110         /* top left */
5111         glTexCoord2f(left, top);
5112         glVertex2i(0, dst_rect.bottom - dst_rect.top);
5113
5114         /* top right */
5115         glTexCoord2f(right, top);
5116         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5117
5118         /* bottom right */
5119         glTexCoord2f(right, bottom);
5120         glVertex2i(dst_rect.right - dst_rect.left, 0);
5121     glEnd();
5122     checkGLcall("glEnd and previous");
5123
5124     if (texture_target != dst_surface->texture_target)
5125     {
5126         glDisable(texture_target);
5127         glEnable(dst_surface->texture_target);
5128         texture_target = dst_surface->texture_target;
5129     }
5130
5131     /* Now read the stretched and upside down image into the destination texture */
5132     context_bind_texture(context, texture_target, dst_surface->texture_name);
5133     glCopyTexSubImage2D(texture_target,
5134                         0,
5135                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5136                         0, 0, /* We blitted the image to the origin */
5137                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5138     checkGLcall("glCopyTexSubImage2D");
5139
5140     if(drawBuffer == GL_BACK) {
5141         /* Write the back buffer backup back */
5142         if(backup) {
5143             if(texture_target != GL_TEXTURE_2D) {
5144                 glDisable(texture_target);
5145                 glEnable(GL_TEXTURE_2D);
5146                 texture_target = GL_TEXTURE_2D;
5147             }
5148             context_bind_texture(context, GL_TEXTURE_2D, backup);
5149         }
5150         else
5151         {
5152             if (texture_target != src_surface->texture_target)
5153             {
5154                 glDisable(texture_target);
5155                 glEnable(src_surface->texture_target);
5156                 texture_target = src_surface->texture_target;
5157             }
5158             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5159         }
5160
5161         glBegin(GL_QUADS);
5162             /* top left */
5163             glTexCoord2f(0.0f, 0.0f);
5164             glVertex2i(0, fbheight);
5165
5166             /* bottom left */
5167             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5168             glVertex2i(0, 0);
5169
5170             /* bottom right */
5171             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5172                     (float)fbheight / (float)src_surface->pow2Height);
5173             glVertex2i(fbwidth, 0);
5174
5175             /* top right */
5176             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5177             glVertex2i(fbwidth, fbheight);
5178         glEnd();
5179     }
5180     glDisable(texture_target);
5181     checkGLcall("glDisable(texture_target)");
5182
5183     /* Cleanup */
5184     if (src != src_surface->texture_name && src != backup)
5185     {
5186         glDeleteTextures(1, &src);
5187         checkGLcall("glDeleteTextures(1, &src)");
5188     }
5189     if(backup) {
5190         glDeleteTextures(1, &backup);
5191         checkGLcall("glDeleteTextures(1, &backup)");
5192     }
5193
5194     LEAVE_GL();
5195
5196     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5197
5198     context_release(context);
5199
5200     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5201      * path is never entered
5202      */
5203     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5204 }
5205
5206 /* Front buffer coordinates are always full screen coordinates, but our GL
5207  * drawable is limited to the window's client area. The sysmem and texture
5208  * copies do have the full screen size. Note that GL has a bottom-left
5209  * origin, while D3D has a top-left origin. */
5210 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5211 {
5212     UINT drawable_height;
5213
5214     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5215             && surface == surface->container.u.swapchain->front_buffer)
5216     {
5217         POINT offset = {0, 0};
5218         RECT windowsize;
5219
5220         ScreenToClient(window, &offset);
5221         OffsetRect(rect, offset.x, offset.y);
5222
5223         GetClientRect(window, &windowsize);
5224         drawable_height = windowsize.bottom - windowsize.top;
5225     }
5226     else
5227     {
5228         drawable_height = surface->resource.height;
5229     }
5230
5231     rect->top = drawable_height - rect->top;
5232     rect->bottom = drawable_height - rect->bottom;
5233 }
5234
5235 static void surface_blt_to_drawable(const struct wined3d_device *device,
5236         WINED3DTEXTUREFILTERTYPE filter, BOOL color_key,
5237         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5238         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5239 {
5240     struct wined3d_context *context;
5241     RECT src_rect, dst_rect;
5242
5243     src_rect = *src_rect_in;
5244     dst_rect = *dst_rect_in;
5245
5246     /* Make sure the surface is up-to-date. This should probably use
5247      * surface_load_location() and worry about the destination surface too,
5248      * unless we're overwriting it completely. */
5249     surface_internal_preload(src_surface, SRGB_RGB);
5250
5251     /* Activate the destination context, set it up for blitting */
5252     context = context_acquire(device, dst_surface);
5253     context_apply_blit_state(context, device);
5254
5255     if (!surface_is_offscreen(dst_surface))
5256         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5257
5258     device->blitter->set_shader(device->blit_priv, context, src_surface);
5259
5260     ENTER_GL();
5261
5262     if (color_key)
5263     {
5264         glEnable(GL_ALPHA_TEST);
5265         checkGLcall("glEnable(GL_ALPHA_TEST)");
5266
5267         /* When the primary render target uses P8, the alpha component
5268          * contains the palette index. Which means that the colorkey is one of
5269          * the palette entries. In other cases pixels that should be masked
5270          * away have alpha set to 0. */
5271         if (primary_render_target_is_p8(device))
5272             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->SrcBltCKey.dwColorSpaceLowValue / 256.0f);
5273         else
5274             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5275         checkGLcall("glAlphaFunc");
5276     }
5277     else
5278     {
5279         glDisable(GL_ALPHA_TEST);
5280         checkGLcall("glDisable(GL_ALPHA_TEST)");
5281     }
5282
5283     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5284
5285     if (color_key)
5286     {
5287         glDisable(GL_ALPHA_TEST);
5288         checkGLcall("glDisable(GL_ALPHA_TEST)");
5289     }
5290
5291     LEAVE_GL();
5292
5293     /* Leave the opengl state valid for blitting */
5294     device->blitter->unset_shader(context->gl_info);
5295
5296     if (wined3d_settings.strict_draw_ordering
5297             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5298             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5299         wglFlush(); /* Flush to ensure ordering across contexts. */
5300
5301     context_release(context);
5302 }
5303
5304 /* Do not call while under the GL lock. */
5305 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const struct wined3d_color *color)
5306 {
5307     struct wined3d_device *device = s->resource.device;
5308     const struct blit_shader *blitter;
5309
5310     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5311             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5312     if (!blitter)
5313     {
5314         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5315         return WINED3DERR_INVALIDCALL;
5316     }
5317
5318     return blitter->color_fill(device, s, rect, color);
5319 }
5320
5321 /* Do not call while under the GL lock. */
5322 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5323         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5324         WINED3DTEXTUREFILTERTYPE Filter)
5325 {
5326     struct wined3d_device *device = dst_surface->resource.device;
5327     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5328     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5329
5330     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5331             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5332             flags, DDBltFx, debug_d3dtexturefiltertype(Filter));
5333
5334     /* Get the swapchain. One of the surfaces has to be a primary surface */
5335     if (dst_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5336     {
5337         WARN("Destination is in sysmem, rejecting gl blt\n");
5338         return WINED3DERR_INVALIDCALL;
5339     }
5340
5341     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5342         dstSwapchain = dst_surface->container.u.swapchain;
5343
5344     if (src_surface)
5345     {
5346         if (src_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5347         {
5348             WARN("Src is in sysmem, rejecting gl blt\n");
5349             return WINED3DERR_INVALIDCALL;
5350         }
5351
5352         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5353             srcSwapchain = src_surface->container.u.swapchain;
5354     }
5355
5356     /* Early sort out of cases where no render target is used */
5357     if (!dstSwapchain && !srcSwapchain
5358             && src_surface != device->fb.render_targets[0]
5359             && dst_surface != device->fb.render_targets[0])
5360     {
5361         TRACE("No surface is render target, not using hardware blit.\n");
5362         return WINED3DERR_INVALIDCALL;
5363     }
5364
5365     /* No destination color keying supported */
5366     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5367     {
5368         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5369         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5370         return WINED3DERR_INVALIDCALL;
5371     }
5372
5373     if (dstSwapchain && dstSwapchain == srcSwapchain)
5374     {
5375         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5376         return WINED3DERR_INVALIDCALL;
5377     }
5378
5379     if (dstSwapchain && srcSwapchain)
5380     {
5381         FIXME("Implement hardware blit between two different swapchains\n");
5382         return WINED3DERR_INVALIDCALL;
5383     }
5384
5385     if (dstSwapchain)
5386     {
5387         /* Handled with regular texture -> swapchain blit */
5388         if (src_surface == device->fb.render_targets[0])
5389             TRACE("Blit from active render target to a swapchain\n");
5390     }
5391     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5392     {
5393         FIXME("Implement blit from a swapchain to the active render target\n");
5394         return WINED3DERR_INVALIDCALL;
5395     }
5396
5397     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5398     {
5399         /* Blit from render target to texture */
5400         BOOL stretchx;
5401
5402         /* P8 read back is not implemented */
5403         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5404                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5405         {
5406             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5407             return WINED3DERR_INVALIDCALL;
5408         }
5409
5410         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5411         {
5412             TRACE("Color keying not supported by frame buffer to texture blit\n");
5413             return WINED3DERR_INVALIDCALL;
5414             /* Destination color key is checked above */
5415         }
5416
5417         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5418             stretchx = TRUE;
5419         else
5420             stretchx = FALSE;
5421
5422         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5423          * flip the image nor scale it.
5424          *
5425          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5426          * -> If the app wants a image width an unscaled width, copy it line per line
5427          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5428          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5429          *    back buffer. This is slower than reading line per line, thus not used for flipping
5430          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5431          *    pixel by pixel. */
5432         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5433                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5434         {
5435             TRACE("No stretching in x direction, using direct framebuffer -> texture copy\n");
5436             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, Filter);
5437         } else {
5438             TRACE("Using hardware stretching to flip / stretch the texture\n");
5439             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, Filter);
5440         }
5441
5442         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5443         {
5444             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5445             dst_surface->resource.allocatedMemory = NULL;
5446             dst_surface->resource.heapMemory = NULL;
5447         }
5448         else
5449         {
5450             dst_surface->flags &= ~SFLAG_INSYSMEM;
5451         }
5452
5453         return WINED3D_OK;
5454     }
5455     else if (src_surface)
5456     {
5457         /* Blit from offscreen surface to render target */
5458         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5459         WINEDDCOLORKEY oldBltCKey = src_surface->SrcBltCKey;
5460
5461         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5462
5463         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5464                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5465                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5466         {
5467             FIXME("Unsupported blit operation falling back to software\n");
5468             return WINED3DERR_INVALIDCALL;
5469         }
5470
5471         /* Color keying: Check if we have to do a color keyed blt,
5472          * and if not check if a color key is activated.
5473          *
5474          * Just modify the color keying parameters in the surface and restore them afterwards
5475          * The surface keeps track of the color key last used to load the opengl surface.
5476          * PreLoad will catch the change to the flags and color key and reload if necessary.
5477          */
5478         if (flags & WINEDDBLT_KEYSRC)
5479         {
5480             /* Use color key from surface */
5481         }
5482         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5483         {
5484             /* Use color key from DDBltFx */
5485             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5486             src_surface->SrcBltCKey = DDBltFx->ddckSrcColorkey;
5487         }
5488         else
5489         {
5490             /* Do not use color key */
5491             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5492         }
5493
5494         surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5495                 src_surface, src_rect, dst_surface, dst_rect);
5496
5497         /* Restore the color key parameters */
5498         src_surface->CKeyFlags = oldCKeyFlags;
5499         src_surface->SrcBltCKey = oldBltCKey;
5500
5501         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5502
5503         return WINED3D_OK;
5504     }
5505
5506     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5507     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5508     return WINED3DERR_INVALIDCALL;
5509 }
5510
5511 /* GL locking is done by the caller */
5512 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5513         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5514 {
5515     struct wined3d_device *device = surface->resource.device;
5516     const struct wined3d_gl_info *gl_info = context->gl_info;
5517     GLint compare_mode = GL_NONE;
5518     struct blt_info info;
5519     GLint old_binding = 0;
5520     RECT rect;
5521
5522     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5523
5524     glDisable(GL_CULL_FACE);
5525     glDisable(GL_BLEND);
5526     glDisable(GL_ALPHA_TEST);
5527     glDisable(GL_SCISSOR_TEST);
5528     glDisable(GL_STENCIL_TEST);
5529     glEnable(GL_DEPTH_TEST);
5530     glDepthFunc(GL_ALWAYS);
5531     glDepthMask(GL_TRUE);
5532     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5533     glViewport(x, y, w, h);
5534
5535     SetRect(&rect, 0, h, w, 0);
5536     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5537     context_active_texture(context, context->gl_info, 0);
5538     glGetIntegerv(info.binding, &old_binding);
5539     glBindTexture(info.bind_target, texture);
5540     if (gl_info->supported[ARB_SHADOW])
5541     {
5542         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5543         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5544     }
5545
5546     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5547             gl_info, info.tex_type, &surface->ds_current_size);
5548
5549     glBegin(GL_TRIANGLE_STRIP);
5550     glTexCoord3fv(info.coords[0]);
5551     glVertex2f(-1.0f, -1.0f);
5552     glTexCoord3fv(info.coords[1]);
5553     glVertex2f(1.0f, -1.0f);
5554     glTexCoord3fv(info.coords[2]);
5555     glVertex2f(-1.0f, 1.0f);
5556     glTexCoord3fv(info.coords[3]);
5557     glVertex2f(1.0f, 1.0f);
5558     glEnd();
5559
5560     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5561     glBindTexture(info.bind_target, old_binding);
5562
5563     glPopAttrib();
5564
5565     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5566 }
5567
5568 void surface_modify_ds_location(struct wined3d_surface *surface,
5569         DWORD location, UINT w, UINT h)
5570 {
5571     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5572
5573     if (location & ~SFLAG_DS_LOCATIONS)
5574         FIXME("Invalid location (%#x) specified.\n", location);
5575
5576     surface->ds_current_size.cx = w;
5577     surface->ds_current_size.cy = h;
5578     surface->flags &= ~SFLAG_DS_LOCATIONS;
5579     surface->flags |= location;
5580 }
5581
5582 /* Context activation is done by the caller. */
5583 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5584 {
5585     struct wined3d_device *device = surface->resource.device;
5586     GLsizei w, h;
5587
5588     TRACE("surface %p, new location %#x.\n", surface, location);
5589
5590     /* TODO: Make this work for modes other than FBO */
5591     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5592
5593     if (!(surface->flags & location))
5594     {
5595         w = surface->ds_current_size.cx;
5596         h = surface->ds_current_size.cy;
5597         surface->ds_current_size.cx = 0;
5598         surface->ds_current_size.cy = 0;
5599     }
5600     else
5601     {
5602         w = surface->resource.width;
5603         h = surface->resource.height;
5604     }
5605
5606     if (surface->ds_current_size.cx == surface->resource.width
5607             && surface->ds_current_size.cy == surface->resource.height)
5608     {
5609         TRACE("Location (%#x) is already up to date.\n", location);
5610         return;
5611     }
5612
5613     if (surface->current_renderbuffer)
5614     {
5615         FIXME("Not supported with fixed up depth stencil.\n");
5616         return;
5617     }
5618
5619     if (!(surface->flags & SFLAG_DS_LOCATIONS))
5620     {
5621         /* This mostly happens when a depth / stencil is used without being
5622          * cleared first. In principle we could upload from sysmem, or
5623          * explicitly clear before first usage. For the moment there don't
5624          * appear to be a lot of applications depending on this, so a FIXME
5625          * should do. */
5626         FIXME("No up to date depth stencil location.\n");
5627         surface->flags |= location;
5628         surface->ds_current_size.cx = surface->resource.width;
5629         surface->ds_current_size.cy = surface->resource.height;
5630         return;
5631     }
5632
5633     if (location == SFLAG_DS_OFFSCREEN)
5634     {
5635         GLint old_binding = 0;
5636         GLenum bind_target;
5637
5638         /* The render target is allowed to be smaller than the depth/stencil
5639          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5640          * than the offscreen surface. Don't overwrite the offscreen surface
5641          * with undefined data. */
5642         w = min(w, context->swapchain->desc.backbuffer_width);
5643         h = min(h, context->swapchain->desc.backbuffer_height);
5644
5645         TRACE("Copying onscreen depth buffer to depth texture.\n");
5646
5647         ENTER_GL();
5648
5649         if (!device->depth_blt_texture)
5650         {
5651             glGenTextures(1, &device->depth_blt_texture);
5652         }
5653
5654         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5655          * directly on the FBO texture. That's because we need to flip. */
5656         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5657                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5658         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5659         {
5660             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5661             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5662         }
5663         else
5664         {
5665             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5666             bind_target = GL_TEXTURE_2D;
5667         }
5668         glBindTexture(bind_target, device->depth_blt_texture);
5669         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5670          * internal format, because the internal format might include stencil
5671          * data. In principle we should copy stencil data as well, but unless
5672          * the driver supports stencil export it's hard to do, and doesn't
5673          * seem to be needed in practice. If the hardware doesn't support
5674          * writing stencil data, the glCopyTexImage2D() call might trigger
5675          * software fallbacks. */
5676         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5677         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5678         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5679         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5680         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5681         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5682         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5683         glBindTexture(bind_target, old_binding);
5684
5685         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5686                 NULL, surface, SFLAG_INTEXTURE);
5687         context_set_draw_buffer(context, GL_NONE);
5688         glReadBuffer(GL_NONE);
5689
5690         /* Do the actual blit */
5691         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5692         checkGLcall("depth_blt");
5693
5694         context_invalidate_state(context, STATE_FRAMEBUFFER);
5695
5696         LEAVE_GL();
5697
5698         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5699     }
5700     else if (location == SFLAG_DS_ONSCREEN)
5701     {
5702         TRACE("Copying depth texture to onscreen depth buffer.\n");
5703
5704         ENTER_GL();
5705
5706         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5707                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5708         surface_depth_blt(surface, context, surface->texture_name,
5709                 0, surface->pow2Height - h, w, h, surface->texture_target);
5710         checkGLcall("depth_blt");
5711
5712         context_invalidate_state(context, STATE_FRAMEBUFFER);
5713
5714         LEAVE_GL();
5715
5716         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5717     }
5718     else
5719     {
5720         ERR("Invalid location (%#x) specified.\n", location);
5721     }
5722
5723     surface->flags |= location;
5724     surface->ds_current_size.cx = surface->resource.width;
5725     surface->ds_current_size.cy = surface->resource.height;
5726 }
5727
5728 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5729 {
5730     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5731     struct wined3d_surface *overlay;
5732
5733     TRACE("surface %p, location %s, persistent %#x.\n",
5734             surface, debug_surflocation(location), persistent);
5735
5736     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5737             && (location & SFLAG_INDRAWABLE))
5738         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5739
5740     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5741             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5742         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5743
5744     if (persistent)
5745     {
5746         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5747                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5748         {
5749             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5750             {
5751                 TRACE("Passing to container.\n");
5752                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5753             }
5754         }
5755         surface->flags &= ~SFLAG_LOCATIONS;
5756         surface->flags |= location;
5757
5758         /* Redraw emulated overlays, if any */
5759         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5760         {
5761             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5762             {
5763                 surface_draw_overlay(overlay);
5764             }
5765         }
5766     }
5767     else
5768     {
5769         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5770         {
5771             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5772             {
5773                 TRACE("Passing to container\n");
5774                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5775             }
5776         }
5777         surface->flags &= ~location;
5778     }
5779
5780     if (!(surface->flags & SFLAG_LOCATIONS))
5781     {
5782         ERR("Surface %p does not have any up to date location.\n", surface);
5783     }
5784 }
5785
5786 static DWORD resource_access_from_location(DWORD location)
5787 {
5788     switch (location)
5789     {
5790         case SFLAG_INSYSMEM:
5791             return WINED3D_RESOURCE_ACCESS_CPU;
5792
5793         case SFLAG_INDRAWABLE:
5794         case SFLAG_INSRGBTEX:
5795         case SFLAG_INTEXTURE:
5796         case SFLAG_INRB_MULTISAMPLE:
5797         case SFLAG_INRB_RESOLVED:
5798             return WINED3D_RESOURCE_ACCESS_GPU;
5799
5800         default:
5801             FIXME("Unhandled location %#x.\n", location);
5802             return 0;
5803     }
5804 }
5805
5806 static void surface_load_sysmem(struct wined3d_surface *surface,
5807         const struct wined3d_gl_info *gl_info, const RECT *rect)
5808 {
5809     surface_prepare_system_memory(surface);
5810
5811     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED))
5812         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5813
5814     /* Download the surface to system memory. */
5815     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5816     {
5817         struct wined3d_device *device = surface->resource.device;
5818         struct wined3d_context *context;
5819
5820         /* TODO: Use already acquired context when possible. */
5821         context = context_acquire(device, NULL);
5822
5823         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5824         surface_download_data(surface, gl_info);
5825
5826         context_release(context);
5827
5828         return;
5829     }
5830
5831     if (surface->flags & SFLAG_INDRAWABLE)
5832     {
5833         read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5834                 wined3d_surface_get_pitch(surface));
5835         return;
5836     }
5837
5838     FIXME("Can't load surface %p with location flags %#x into sysmem.\n",
5839             surface, surface->flags & SFLAG_LOCATIONS);
5840 }
5841
5842 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5843         const struct wined3d_gl_info *gl_info, const RECT *rect)
5844 {
5845     struct wined3d_device *device = surface->resource.device;
5846     struct wined3d_format format;
5847     CONVERT_TYPES convert;
5848     UINT byte_count;
5849     BYTE *mem;
5850
5851     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5852     {
5853         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5854         return WINED3DERR_INVALIDCALL;
5855     }
5856
5857     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5858         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5859
5860     if (surface->flags & SFLAG_INTEXTURE)
5861     {
5862         RECT r;
5863
5864         surface_get_rect(surface, rect, &r);
5865         surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
5866
5867         return WINED3D_OK;
5868     }
5869
5870     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5871     {
5872         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5873          * path through sysmem. */
5874         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5875     }
5876
5877     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5878
5879     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5880      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5881      * called. */
5882     if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
5883     {
5884         struct wined3d_context *context;
5885
5886         TRACE("Removing the pbo attached to surface %p.\n", surface);
5887
5888         /* TODO: Use already acquired context when possible. */
5889         context = context_acquire(device, NULL);
5890
5891         surface_remove_pbo(surface, gl_info);
5892
5893         context_release(context);
5894     }
5895
5896     if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
5897     {
5898         UINT height = surface->resource.height;
5899         UINT width = surface->resource.width;
5900         UINT src_pitch, dst_pitch;
5901
5902         byte_count = format.conv_byte_count;
5903         src_pitch = wined3d_surface_get_pitch(surface);
5904
5905         /* Stick to the alignment for the converted surface too, makes it
5906          * easier to load the surface. */
5907         dst_pitch = width * byte_count;
5908         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5909
5910         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5911         {
5912             ERR("Out of memory (%u).\n", dst_pitch * height);
5913             return E_OUTOFMEMORY;
5914         }
5915
5916         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5917                 src_pitch, width, height, dst_pitch, convert, surface);
5918
5919         surface->flags |= SFLAG_CONVERTED;
5920     }
5921     else
5922     {
5923         surface->flags &= ~SFLAG_CONVERTED;
5924         mem = surface->resource.allocatedMemory;
5925         byte_count = format.byte_count;
5926     }
5927
5928     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
5929
5930     /* Don't delete PBO memory. */
5931     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
5932         HeapFree(GetProcessHeap(), 0, mem);
5933
5934     return WINED3D_OK;
5935 }
5936
5937 static HRESULT surface_load_texture(struct wined3d_surface *surface,
5938         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
5939 {
5940     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
5941     struct wined3d_device *device = surface->resource.device;
5942     struct wined3d_context *context;
5943     UINT width, src_pitch, dst_pitch;
5944     struct wined3d_bo_address data;
5945     struct wined3d_format format;
5946     POINT dst_point = {0, 0};
5947     CONVERT_TYPES convert;
5948     BYTE *mem;
5949
5950     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
5951             && surface_is_offscreen(surface)
5952             && (surface->flags & SFLAG_INDRAWABLE))
5953     {
5954         surface_load_fb_texture(surface, srgb);
5955
5956         return WINED3D_OK;
5957     }
5958
5959     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
5960             && (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB)
5961             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5962                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
5963                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
5964     {
5965         if (srgb)
5966             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INTEXTURE,
5967                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
5968         else
5969             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INSRGBTEX,
5970                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
5971
5972         return WINED3D_OK;
5973     }
5974
5975     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED)
5976             && (!srgb || (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB))
5977             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5978                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
5979                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
5980     {
5981         DWORD src_location = surface->flags & SFLAG_INRB_RESOLVED ? SFLAG_INRB_RESOLVED : SFLAG_INRB_MULTISAMPLE;
5982         DWORD dst_location = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
5983         RECT rect = {0, 0, surface->resource.width, surface->resource.height};
5984
5985         surface_blt_fbo(device, WINED3DTEXF_POINT, surface, src_location,
5986                 &rect, surface, dst_location, &rect);
5987
5988         return WINED3D_OK;
5989     }
5990
5991     /* Upload from system memory */
5992
5993     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
5994             TRUE /* We will use textures */, &format, &convert);
5995
5996     if (srgb)
5997     {
5998         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
5999         {
6000             /* Performance warning... */
6001             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6002             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6003         }
6004     }
6005     else
6006     {
6007         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6008         {
6009             /* Performance warning... */
6010             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6011             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6012         }
6013     }
6014
6015     if (!(surface->flags & SFLAG_INSYSMEM))
6016     {
6017         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6018         /* Lets hope we get it from somewhere... */
6019         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6020     }
6021
6022     /* TODO: Use already acquired context when possible. */
6023     context = context_acquire(device, NULL);
6024
6025     surface_prepare_texture(surface, context, srgb);
6026     surface_bind_and_dirtify(surface, context, srgb);
6027
6028     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6029     {
6030         surface->flags |= SFLAG_GLCKEY;
6031         surface->glCKey = surface->SrcBltCKey;
6032     }
6033     else surface->flags &= ~SFLAG_GLCKEY;
6034
6035     width = surface->resource.width;
6036     src_pitch = wined3d_surface_get_pitch(surface);
6037
6038     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6039      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6040      * called. */
6041     if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
6042     {
6043         TRACE("Removing the pbo attached to surface %p.\n", surface);
6044         surface_remove_pbo(surface, gl_info);
6045     }
6046
6047     if (format.convert)
6048     {
6049         /* This code is entered for texture formats which need a fixup. */
6050         UINT height = surface->resource.height;
6051
6052         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6053         dst_pitch = width * format.conv_byte_count;
6054         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6055
6056         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6057         {
6058             ERR("Out of memory (%u).\n", dst_pitch * height);
6059             context_release(context);
6060             return E_OUTOFMEMORY;
6061         }
6062         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6063     }
6064     else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
6065     {
6066         /* This code is only entered for color keying fixups */
6067         UINT height = surface->resource.height;
6068
6069         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6070         dst_pitch = width * format.conv_byte_count;
6071         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6072
6073         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6074         {
6075             ERR("Out of memory (%u).\n", dst_pitch * height);
6076             context_release(context);
6077             return E_OUTOFMEMORY;
6078         }
6079         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6080                 width, height, dst_pitch, convert, surface);
6081     }
6082     else
6083     {
6084         mem = surface->resource.allocatedMemory;
6085     }
6086
6087     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6088     data.addr = mem;
6089     surface_upload_data(surface, gl_info, &format, &src_rect, src_pitch, &dst_point, srgb, &data);
6090
6091     context_release(context);
6092
6093     /* Don't delete PBO memory. */
6094     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6095         HeapFree(GetProcessHeap(), 0, mem);
6096
6097     return WINED3D_OK;
6098 }
6099
6100 static void surface_multisample_resolve(struct wined3d_surface *surface)
6101 {
6102     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6103
6104     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6105         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6106
6107     surface_blt_fbo(surface->resource.device, WINED3DTEXF_POINT,
6108             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6109 }
6110
6111 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6112 {
6113     struct wined3d_device *device = surface->resource.device;
6114     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6115     HRESULT hr;
6116
6117     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6118
6119     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6120     {
6121         if (location == SFLAG_INTEXTURE)
6122         {
6123             struct wined3d_context *context = context_acquire(device, NULL);
6124             surface_load_ds_location(surface, context, SFLAG_DS_OFFSCREEN);
6125             context_release(context);
6126             return WINED3D_OK;
6127         }
6128         else
6129         {
6130             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6131             return WINED3DERR_INVALIDCALL;
6132         }
6133     }
6134
6135     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6136         location = SFLAG_INTEXTURE;
6137
6138     if (surface->flags & location)
6139     {
6140         TRACE("Location already up to date.\n");
6141
6142         if (location == SFLAG_INSYSMEM && !(surface->flags & SFLAG_PBO)
6143                 && surface_need_pbo(surface, gl_info))
6144             surface_load_pbo(surface, gl_info);
6145
6146         return WINED3D_OK;
6147     }
6148
6149     if (WARN_ON(d3d_surface))
6150     {
6151         DWORD required_access = resource_access_from_location(location);
6152         if ((surface->resource.access_flags & required_access) != required_access)
6153             WARN("Operation requires %#x access, but surface only has %#x.\n",
6154                     required_access, surface->resource.access_flags);
6155     }
6156
6157     if (!(surface->flags & SFLAG_LOCATIONS))
6158     {
6159         ERR("Surface %p does not have any up to date location.\n", surface);
6160         surface->flags |= SFLAG_LOST;
6161         return WINED3DERR_DEVICELOST;
6162     }
6163
6164     switch (location)
6165     {
6166         case SFLAG_INSYSMEM:
6167             surface_load_sysmem(surface, gl_info, rect);
6168             break;
6169
6170         case SFLAG_INDRAWABLE:
6171             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6172                 return hr;
6173             break;
6174
6175         case SFLAG_INRB_RESOLVED:
6176             surface_multisample_resolve(surface);
6177             break;
6178
6179         case SFLAG_INTEXTURE:
6180         case SFLAG_INSRGBTEX:
6181             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6182                 return hr;
6183             break;
6184
6185         default:
6186             ERR("Don't know how to handle location %#x.\n", location);
6187             break;
6188     }
6189
6190     if (!rect)
6191     {
6192         surface->flags |= location;
6193
6194         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6195             surface_evict_sysmem(surface);
6196     }
6197
6198     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6199             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6200     {
6201         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6202     }
6203
6204     return WINED3D_OK;
6205 }
6206
6207 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6208 {
6209     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6210
6211     /* Not on a swapchain - must be offscreen */
6212     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6213
6214     /* The front buffer is always onscreen */
6215     if (surface == swapchain->front_buffer) return FALSE;
6216
6217     /* If the swapchain is rendered to an FBO, the backbuffer is
6218      * offscreen, otherwise onscreen */
6219     return swapchain->render_to_fbo;
6220 }
6221
6222 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6223 /* Context activation is done by the caller. */
6224 static void ffp_blit_free(struct wined3d_device *device) { }
6225
6226 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6227 /* Context activation is done by the caller. */
6228 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6229 {
6230     BYTE table[256][4];
6231     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6232
6233     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6234
6235     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6236     ENTER_GL();
6237     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6238     LEAVE_GL();
6239 }
6240
6241 /* Context activation is done by the caller. */
6242 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6243 {
6244     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6245
6246     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6247      * else the surface is converted in software at upload time in LoadLocation.
6248      */
6249     if (!(surface->flags & SFLAG_CONVERTED) && fixup == COMPLEX_FIXUP_P8
6250             && context->gl_info->supported[EXT_PALETTED_TEXTURE])
6251         ffp_blit_p8_upload_palette(surface, context->gl_info);
6252
6253     ENTER_GL();
6254     glEnable(surface->texture_target);
6255     checkGLcall("glEnable(surface->texture_target)");
6256     LEAVE_GL();
6257     return WINED3D_OK;
6258 }
6259
6260 /* Context activation is done by the caller. */
6261 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6262 {
6263     ENTER_GL();
6264     glDisable(GL_TEXTURE_2D);
6265     checkGLcall("glDisable(GL_TEXTURE_2D)");
6266     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6267     {
6268         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6269         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6270     }
6271     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6272     {
6273         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6274         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6275     }
6276     LEAVE_GL();
6277 }
6278
6279 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6280         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6281         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6282 {
6283     enum complex_fixup src_fixup;
6284
6285     switch (blit_op)
6286     {
6287         case WINED3D_BLIT_OP_COLOR_BLIT:
6288             if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
6289                 return FALSE;
6290
6291             src_fixup = get_complex_fixup(src_format->color_fixup);
6292             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6293             {
6294                 TRACE("Checking support for fixup:\n");
6295                 dump_color_fixup_desc(src_format->color_fixup);
6296             }
6297
6298             if (!is_identity_fixup(dst_format->color_fixup))
6299             {
6300                 TRACE("Destination fixups are not supported\n");
6301                 return FALSE;
6302             }
6303
6304             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6305             {
6306                 TRACE("P8 fixup supported\n");
6307                 return TRUE;
6308             }
6309
6310             /* We only support identity conversions. */
6311             if (is_identity_fixup(src_format->color_fixup))
6312             {
6313                 TRACE("[OK]\n");
6314                 return TRUE;
6315             }
6316
6317             TRACE("[FAILED]\n");
6318             return FALSE;
6319
6320         case WINED3D_BLIT_OP_COLOR_FILL:
6321             if (dst_pool == WINED3DPOOL_SYSTEMMEM)
6322                 return FALSE;
6323
6324             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6325             {
6326                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6327                     return FALSE;
6328             }
6329             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6330             {
6331                 TRACE("Color fill not supported\n");
6332                 return FALSE;
6333             }
6334
6335             /* FIXME: We should reject color fills on formats with fixups,
6336              * but this would break P8 color fills for example. */
6337
6338             return TRUE;
6339
6340         case WINED3D_BLIT_OP_DEPTH_FILL:
6341             return TRUE;
6342
6343         default:
6344             TRACE("Unsupported blit_op=%d\n", blit_op);
6345             return FALSE;
6346     }
6347 }
6348
6349 /* Do not call while under the GL lock. */
6350 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6351         const RECT *dst_rect, const struct wined3d_color *color)
6352 {
6353     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6354     struct wined3d_fb_state fb = {&dst_surface, NULL};
6355
6356     return device_clear_render_targets(device, 1, &fb,
6357             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6358 }
6359
6360 /* Do not call while under the GL lock. */
6361 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6362         struct wined3d_surface *surface, const RECT *rect, float depth)
6363 {
6364     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6365     struct wined3d_fb_state fb = {NULL, surface};
6366
6367     return device_clear_render_targets(device, 0, &fb,
6368             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6369 }
6370
6371 const struct blit_shader ffp_blit =  {
6372     ffp_blit_alloc,
6373     ffp_blit_free,
6374     ffp_blit_set,
6375     ffp_blit_unset,
6376     ffp_blit_supported,
6377     ffp_blit_color_fill,
6378     ffp_blit_depth_fill,
6379 };
6380
6381 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6382 {
6383     return WINED3D_OK;
6384 }
6385
6386 /* Context activation is done by the caller. */
6387 static void cpu_blit_free(struct wined3d_device *device)
6388 {
6389 }
6390
6391 /* Context activation is done by the caller. */
6392 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6393 {
6394     return WINED3D_OK;
6395 }
6396
6397 /* Context activation is done by the caller. */
6398 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6399 {
6400 }
6401
6402 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6403         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6404         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6405 {
6406     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6407     {
6408         return TRUE;
6409     }
6410
6411     return FALSE;
6412 }
6413
6414 static HRESULT surface_cpu_blt_compressed(const BYTE *src_data, BYTE *dst_data,
6415         UINT src_pitch, UINT dst_pitch, UINT update_w, UINT update_h,
6416         const struct wined3d_format *format, DWORD flags, const WINEDDBLTFX *fx)
6417 {
6418     UINT row_block_count;
6419     const BYTE *src_row;
6420     BYTE *dst_row;
6421     UINT x, y;
6422
6423     src_row = src_data;
6424     dst_row = dst_data;
6425
6426     row_block_count = (update_w + format->block_width - 1) / format->block_width;
6427
6428     if (!flags)
6429     {
6430         for (y = 0; y < update_h; y += format->block_height)
6431         {
6432             memcpy(dst_row, src_row, row_block_count * format->block_byte_count);
6433             src_row += src_pitch;
6434             dst_row += dst_pitch;
6435         }
6436
6437         return WINED3D_OK;
6438     }
6439
6440     if (flags == WINEDDBLT_DDFX && fx->dwDDFX == WINEDDBLTFX_MIRRORUPDOWN)
6441     {
6442         src_row += (((update_h / format->block_height) - 1) * src_pitch);
6443
6444         switch (format->id)
6445         {
6446             case WINED3DFMT_DXT1:
6447                 for (y = 0; y < update_h; y += format->block_height)
6448                 {
6449                     struct block
6450                     {
6451                         WORD color[2];
6452                         BYTE control_row[4];
6453                     };
6454
6455                     const struct block *s = (const struct block *)src_row;
6456                     struct block *d = (struct block *)dst_row;
6457
6458                     for (x = 0; x < row_block_count; ++x)
6459                     {
6460                         d[x].color[0] = s[x].color[0];
6461                         d[x].color[1] = s[x].color[1];
6462                         d[x].control_row[0] = s[x].control_row[3];
6463                         d[x].control_row[1] = s[x].control_row[2];
6464                         d[x].control_row[2] = s[x].control_row[1];
6465                         d[x].control_row[3] = s[x].control_row[0];
6466                     }
6467                     src_row -= src_pitch;
6468                     dst_row += dst_pitch;
6469                 }
6470                 return WINED3D_OK;
6471
6472             case WINED3DFMT_DXT3:
6473                 for (y = 0; y < update_h; y += format->block_height)
6474                 {
6475                     struct block
6476                     {
6477                         WORD alpha_row[4];
6478                         WORD color[2];
6479                         BYTE control_row[4];
6480                     };
6481
6482                     const struct block *s = (const struct block *)src_row;
6483                     struct block *d = (struct block *)dst_row;
6484
6485                     for (x = 0; x < row_block_count; ++x)
6486                     {
6487                         d[x].alpha_row[0] = s[x].alpha_row[3];
6488                         d[x].alpha_row[1] = s[x].alpha_row[2];
6489                         d[x].alpha_row[2] = s[x].alpha_row[1];
6490                         d[x].alpha_row[3] = s[x].alpha_row[0];
6491                         d[x].color[0] = s[x].color[0];
6492                         d[x].color[1] = s[x].color[1];
6493                         d[x].control_row[0] = s[x].control_row[3];
6494                         d[x].control_row[1] = s[x].control_row[2];
6495                         d[x].control_row[2] = s[x].control_row[1];
6496                         d[x].control_row[3] = s[x].control_row[0];
6497                     }
6498                     src_row -= src_pitch;
6499                     dst_row += dst_pitch;
6500                 }
6501                 return WINED3D_OK;
6502
6503             default:
6504                 FIXME("Compressed flip not implemented for format %s.\n",
6505                         debug_d3dformat(format->id));
6506                 return E_NOTIMPL;
6507         }
6508     }
6509
6510     FIXME("Unsupported blit on compressed surface (format %s, flags %#x, DDFX %#x).\n",
6511             debug_d3dformat(format->id), flags, flags & WINEDDBLT_DDFX ? fx->dwDDFX : 0);
6512
6513     return E_NOTIMPL;
6514 }
6515
6516 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6517         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6518         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
6519 {
6520     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6521     const struct wined3d_format *src_format, *dst_format;
6522     struct wined3d_surface *orig_src = src_surface;
6523     struct wined3d_mapped_rect dst_map, src_map;
6524     HRESULT hr = WINED3D_OK;
6525     const BYTE *sbuf;
6526     RECT xdst,xsrc;
6527     BYTE *dbuf;
6528     int x, y;
6529
6530     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6531             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6532             flags, fx, debug_d3dtexturefiltertype(filter));
6533
6534     xsrc = *src_rect;
6535
6536     if (!src_surface)
6537     {
6538         RECT full_rect;
6539
6540         full_rect.left = 0;
6541         full_rect.top = 0;
6542         full_rect.right = dst_surface->resource.width;
6543         full_rect.bottom = dst_surface->resource.height;
6544         IntersectRect(&xdst, &full_rect, dst_rect);
6545     }
6546     else
6547     {
6548         BOOL clip_horiz, clip_vert;
6549
6550         xdst = *dst_rect;
6551         clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6552         clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6553
6554         if (clip_vert || clip_horiz)
6555         {
6556             /* Now check if this is a special case or not... */
6557             if ((flags & WINEDDBLT_DDFX)
6558                     || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6559                     || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6560             {
6561                 WARN("Out of screen rectangle in special case. Not handled right now.\n");
6562                 return WINED3D_OK;
6563             }
6564
6565             if (clip_horiz)
6566             {
6567                 if (xdst.left < 0)
6568                 {
6569                     xsrc.left -= xdst.left;
6570                     xdst.left = 0;
6571                 }
6572                 if (xdst.right > dst_surface->resource.width)
6573                 {
6574                     xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6575                     xdst.right = (int)dst_surface->resource.width;
6576                 }
6577             }
6578
6579             if (clip_vert)
6580             {
6581                 if (xdst.top < 0)
6582                 {
6583                     xsrc.top -= xdst.top;
6584                     xdst.top = 0;
6585                 }
6586                 if (xdst.bottom > dst_surface->resource.height)
6587                 {
6588                     xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6589                     xdst.bottom = (int)dst_surface->resource.height;
6590                 }
6591             }
6592
6593             /* And check if after clipping something is still to be done... */
6594             if ((xdst.right <= 0) || (xdst.bottom <= 0)
6595                     || (xdst.left >= (int)dst_surface->resource.width)
6596                     || (xdst.top >= (int)dst_surface->resource.height)
6597                     || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6598                     || (xsrc.left >= (int)src_surface->resource.width)
6599                     || (xsrc.top >= (int)src_surface->resource.height))
6600             {
6601                 TRACE("Nothing to be done after clipping.\n");
6602                 return WINED3D_OK;
6603             }
6604         }
6605     }
6606
6607     if (src_surface == dst_surface)
6608     {
6609         wined3d_surface_map(dst_surface, &dst_map, NULL, 0);
6610         src_map = dst_map;
6611         src_format = dst_surface->resource.format;
6612         dst_format = src_format;
6613     }
6614     else
6615     {
6616         dst_format = dst_surface->resource.format;
6617         if (src_surface)
6618         {
6619             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6620             {
6621                 src_surface = surface_convert_format(src_surface, dst_format->id);
6622                 if (!src_surface)
6623                 {
6624                     /* The conv function writes a FIXME */
6625                     WARN("Cannot convert source surface format to dest format.\n");
6626                     goto release;
6627                 }
6628             }
6629             wined3d_surface_map(src_surface, &src_map, NULL, WINED3DLOCK_READONLY);
6630             src_format = src_surface->resource.format;
6631         }
6632         else
6633         {
6634             src_format = dst_format;
6635         }
6636         if (dst_rect)
6637             wined3d_surface_map(dst_surface, &dst_map, &xdst, 0);
6638         else
6639             wined3d_surface_map(dst_surface, &dst_map, NULL, 0);
6640     }
6641
6642     bpp = dst_surface->resource.format->byte_count;
6643     srcheight = xsrc.bottom - xsrc.top;
6644     srcwidth = xsrc.right - xsrc.left;
6645     dstheight = xdst.bottom - xdst.top;
6646     dstwidth = xdst.right - xdst.left;
6647     width = (xdst.right - xdst.left) * bpp;
6648
6649     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_BLOCKS)
6650     {
6651         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6652
6653         if (src_surface == dst_surface)
6654         {
6655             FIXME("Only plain blits supported on compressed surfaces.\n");
6656             hr = E_NOTIMPL;
6657             goto release;
6658         }
6659
6660         if (srcheight != dstheight || srcwidth != dstwidth)
6661         {
6662             WARN("Stretching not supported on compressed surfaces.\n");
6663             hr = WINED3DERR_INVALIDCALL;
6664             goto release;
6665         }
6666
6667         if (srcwidth & (src_format->block_width - 1) || srcheight & (src_format->block_height - 1))
6668         {
6669             WARN("Rectangle not block-aligned.\n");
6670             hr = WINED3DERR_INVALIDCALL;
6671             goto release;
6672         }
6673
6674         hr = surface_cpu_blt_compressed(src_map.data, dst_map.data,
6675                 src_map.row_pitch, dst_map.row_pitch, dstwidth, dstheight,
6676                 src_format, flags, fx);
6677         goto release;
6678     }
6679
6680     if (dst_rect && src_surface != dst_surface)
6681         dbuf = dst_map.data;
6682     else
6683         dbuf = (BYTE *)dst_map.data + (xdst.top * dst_map.row_pitch) + (xdst.left * bpp);
6684
6685     /* First, all the 'source-less' blits */
6686     if (flags & WINEDDBLT_COLORFILL)
6687     {
6688         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, fx->u5.dwFillColor);
6689         flags &= ~WINEDDBLT_COLORFILL;
6690     }
6691
6692     if (flags & WINEDDBLT_DEPTHFILL)
6693     {
6694         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6695     }
6696     if (flags & WINEDDBLT_ROP)
6697     {
6698         /* Catch some degenerate cases here. */
6699         switch (fx->dwROP)
6700         {
6701             case BLACKNESS:
6702                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, 0);
6703                 break;
6704             case 0xAA0029: /* No-op */
6705                 break;
6706             case WHITENESS:
6707                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, ~0U);
6708                 break;
6709             case SRCCOPY: /* Well, we do that below? */
6710                 break;
6711             default:
6712                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6713                 goto error;
6714         }
6715         flags &= ~WINEDDBLT_ROP;
6716     }
6717     if (flags & WINEDDBLT_DDROPS)
6718     {
6719         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6720     }
6721     /* Now the 'with source' blits. */
6722     if (src_surface)
6723     {
6724         const BYTE *sbase;
6725         int sx, xinc, sy, yinc;
6726
6727         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6728             goto release;
6729
6730         if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
6731                 && (srcwidth != dstwidth || srcheight != dstheight))
6732         {
6733             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6734             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6735         }
6736
6737         sbase = (BYTE *)src_map.data + (xsrc.top * src_map.row_pitch) + xsrc.left * bpp;
6738         xinc = (srcwidth << 16) / dstwidth;
6739         yinc = (srcheight << 16) / dstheight;
6740
6741         if (!flags)
6742         {
6743             /* No effects, we can cheat here. */
6744             if (dstwidth == srcwidth)
6745             {
6746                 if (dstheight == srcheight)
6747                 {
6748                     /* No stretching in either direction. This needs to be as
6749                      * fast as possible. */
6750                     sbuf = sbase;
6751
6752                     /* Check for overlapping surfaces. */
6753                     if (src_surface != dst_surface || xdst.top < xsrc.top
6754                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6755                     {
6756                         /* No overlap, or dst above src, so copy from top downwards. */
6757                         for (y = 0; y < dstheight; ++y)
6758                         {
6759                             memcpy(dbuf, sbuf, width);
6760                             sbuf += src_map.row_pitch;
6761                             dbuf += dst_map.row_pitch;
6762                         }
6763                     }
6764                     else if (xdst.top > xsrc.top)
6765                     {
6766                         /* Copy from bottom upwards. */
6767                         sbuf += src_map.row_pitch * dstheight;
6768                         dbuf += dst_map.row_pitch * dstheight;
6769                         for (y = 0; y < dstheight; ++y)
6770                         {
6771                             sbuf -= src_map.row_pitch;
6772                             dbuf -= dst_map.row_pitch;
6773                             memcpy(dbuf, sbuf, width);
6774                         }
6775                     }
6776                     else
6777                     {
6778                         /* Src and dst overlapping on the same line, use memmove. */
6779                         for (y = 0; y < dstheight; ++y)
6780                         {
6781                             memmove(dbuf, sbuf, width);
6782                             sbuf += src_map.row_pitch;
6783                             dbuf += dst_map.row_pitch;
6784                         }
6785                     }
6786                 }
6787                 else
6788                 {
6789                     /* Stretching in y direction only. */
6790                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6791                     {
6792                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6793                         memcpy(dbuf, sbuf, width);
6794                         dbuf += dst_map.row_pitch;
6795                     }
6796                 }
6797             }
6798             else
6799             {
6800                 /* Stretching in X direction. */
6801                 int last_sy = -1;
6802                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6803                 {
6804                     sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6805
6806                     if ((sy >> 16) == (last_sy >> 16))
6807                     {
6808                         /* This source row is the same as last source row -
6809                          * Copy the already stretched row. */
6810                         memcpy(dbuf, dbuf - dst_map.row_pitch, width);
6811                     }
6812                     else
6813                     {
6814 #define STRETCH_ROW(type) \
6815 do { \
6816     const type *s = (const type *)sbuf; \
6817     type *d = (type *)dbuf; \
6818     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6819         d[x] = s[sx >> 16]; \
6820 } while(0)
6821
6822                         switch(bpp)
6823                         {
6824                             case 1:
6825                                 STRETCH_ROW(BYTE);
6826                                 break;
6827                             case 2:
6828                                 STRETCH_ROW(WORD);
6829                                 break;
6830                             case 4:
6831                                 STRETCH_ROW(DWORD);
6832                                 break;
6833                             case 3:
6834                             {
6835                                 const BYTE *s;
6836                                 BYTE *d = dbuf;
6837                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6838                                 {
6839                                     DWORD pixel;
6840
6841                                     s = sbuf + 3 * (sx >> 16);
6842                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6843                                     d[0] = (pixel      ) & 0xff;
6844                                     d[1] = (pixel >>  8) & 0xff;
6845                                     d[2] = (pixel >> 16) & 0xff;
6846                                     d += 3;
6847                                 }
6848                                 break;
6849                             }
6850                             default:
6851                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6852                                 hr = WINED3DERR_NOTAVAILABLE;
6853                                 goto error;
6854                         }
6855 #undef STRETCH_ROW
6856                     }
6857                     dbuf += dst_map.row_pitch;
6858                     last_sy = sy;
6859                 }
6860             }
6861         }
6862         else
6863         {
6864             LONG dstyinc = dst_map.row_pitch, dstxinc = bpp;
6865             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6866             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6867             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6868             {
6869                 /* The color keying flags are checked for correctness in ddraw */
6870                 if (flags & WINEDDBLT_KEYSRC)
6871                 {
6872                     keylow  = src_surface->SrcBltCKey.dwColorSpaceLowValue;
6873                     keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
6874                 }
6875                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6876                 {
6877                     keylow = fx->ddckSrcColorkey.dwColorSpaceLowValue;
6878                     keyhigh = fx->ddckSrcColorkey.dwColorSpaceHighValue;
6879                 }
6880
6881                 if (flags & WINEDDBLT_KEYDEST)
6882                 {
6883                     /* Destination color keys are taken from the source surface! */
6884                     destkeylow = src_surface->DestBltCKey.dwColorSpaceLowValue;
6885                     destkeyhigh = src_surface->DestBltCKey.dwColorSpaceHighValue;
6886                 }
6887                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6888                 {
6889                     destkeylow = fx->ddckDestColorkey.dwColorSpaceLowValue;
6890                     destkeyhigh = fx->ddckDestColorkey.dwColorSpaceHighValue;
6891                 }
6892
6893                 if (bpp == 1)
6894                 {
6895                     keymask = 0xff;
6896                 }
6897                 else
6898                 {
6899                     keymask = src_format->red_mask
6900                             | src_format->green_mask
6901                             | src_format->blue_mask;
6902                 }
6903                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6904             }
6905
6906             if (flags & WINEDDBLT_DDFX)
6907             {
6908                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6909                 LONG tmpxy;
6910                 dTopLeft     = dbuf;
6911                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6912                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dst_map.row_pitch);
6913                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6914
6915                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6916                 {
6917                     /* I don't think we need to do anything about this flag */
6918                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6919                 }
6920                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6921                 {
6922                     tmp          = dTopRight;
6923                     dTopRight    = dTopLeft;
6924                     dTopLeft     = tmp;
6925                     tmp          = dBottomRight;
6926                     dBottomRight = dBottomLeft;
6927                     dBottomLeft  = tmp;
6928                     dstxinc = dstxinc * -1;
6929                 }
6930                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6931                 {
6932                     tmp          = dTopLeft;
6933                     dTopLeft     = dBottomLeft;
6934                     dBottomLeft  = tmp;
6935                     tmp          = dTopRight;
6936                     dTopRight    = dBottomRight;
6937                     dBottomRight = tmp;
6938                     dstyinc = dstyinc * -1;
6939                 }
6940                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6941                 {
6942                     /* I don't think we need to do anything about this flag */
6943                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6944                 }
6945                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6946                 {
6947                     tmp          = dBottomRight;
6948                     dBottomRight = dTopLeft;
6949                     dTopLeft     = tmp;
6950                     tmp          = dBottomLeft;
6951                     dBottomLeft  = dTopRight;
6952                     dTopRight    = tmp;
6953                     dstxinc = dstxinc * -1;
6954                     dstyinc = dstyinc * -1;
6955                 }
6956                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6957                 {
6958                     tmp          = dTopLeft;
6959                     dTopLeft     = dBottomLeft;
6960                     dBottomLeft  = dBottomRight;
6961                     dBottomRight = dTopRight;
6962                     dTopRight    = tmp;
6963                     tmpxy   = dstxinc;
6964                     dstxinc = dstyinc;
6965                     dstyinc = tmpxy;
6966                     dstxinc = dstxinc * -1;
6967                 }
6968                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6969                 {
6970                     tmp          = dTopLeft;
6971                     dTopLeft     = dTopRight;
6972                     dTopRight    = dBottomRight;
6973                     dBottomRight = dBottomLeft;
6974                     dBottomLeft  = tmp;
6975                     tmpxy   = dstxinc;
6976                     dstxinc = dstyinc;
6977                     dstyinc = tmpxy;
6978                     dstyinc = dstyinc * -1;
6979                 }
6980                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6981                 {
6982                     /* I don't think we need to do anything about this flag */
6983                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6984                 }
6985                 dbuf = dTopLeft;
6986                 flags &= ~(WINEDDBLT_DDFX);
6987             }
6988
6989 #define COPY_COLORKEY_FX(type) \
6990 do { \
6991     const type *s; \
6992     type *d = (type *)dbuf, *dx, tmp; \
6993     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
6994     { \
6995         s = (const type *)(sbase + (sy >> 16) * src_map.row_pitch); \
6996         dx = d; \
6997         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6998         { \
6999             tmp = s[sx >> 16]; \
7000             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
7001                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
7002             { \
7003                 dx[0] = tmp; \
7004             } \
7005             dx = (type *)(((BYTE *)dx) + dstxinc); \
7006         } \
7007         d = (type *)(((BYTE *)d) + dstyinc); \
7008     } \
7009 } while(0)
7010
7011             switch (bpp)
7012             {
7013                 case 1:
7014                     COPY_COLORKEY_FX(BYTE);
7015                     break;
7016                 case 2:
7017                     COPY_COLORKEY_FX(WORD);
7018                     break;
7019                 case 4:
7020                     COPY_COLORKEY_FX(DWORD);
7021                     break;
7022                 case 3:
7023                 {
7024                     const BYTE *s;
7025                     BYTE *d = dbuf, *dx;
7026                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
7027                     {
7028                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
7029                         dx = d;
7030                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7031                         {
7032                             DWORD pixel, dpixel = 0;
7033                             s = sbuf + 3 * (sx>>16);
7034                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7035                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7036                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7037                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7038                             {
7039                                 dx[0] = (pixel      ) & 0xff;
7040                                 dx[1] = (pixel >>  8) & 0xff;
7041                                 dx[2] = (pixel >> 16) & 0xff;
7042                             }
7043                             dx += dstxinc;
7044                         }
7045                         d += dstyinc;
7046                     }
7047                     break;
7048                 }
7049                 default:
7050                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7051                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7052                     hr = WINED3DERR_NOTAVAILABLE;
7053                     goto error;
7054 #undef COPY_COLORKEY_FX
7055             }
7056         }
7057     }
7058
7059 error:
7060     if (flags && FIXME_ON(d3d_surface))
7061     {
7062         FIXME("\tUnsupported flags: %#x.\n", flags);
7063     }
7064
7065 release:
7066     wined3d_surface_unmap(dst_surface);
7067     if (src_surface && src_surface != dst_surface)
7068         wined3d_surface_unmap(src_surface);
7069     /* Release the converted surface, if any. */
7070     if (src_surface && src_surface != orig_src)
7071         wined3d_surface_decref(src_surface);
7072
7073     return hr;
7074 }
7075
7076 /* Do not call while under the GL lock. */
7077 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7078         const RECT *dst_rect, const struct wined3d_color *color)
7079 {
7080     static const RECT src_rect;
7081     WINEDDBLTFX BltFx;
7082
7083     memset(&BltFx, 0, sizeof(BltFx));
7084     BltFx.dwSize = sizeof(BltFx);
7085     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7086     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7087             WINEDDBLT_COLORFILL, &BltFx, WINED3DTEXF_POINT);
7088 }
7089
7090 /* Do not call while under the GL lock. */
7091 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7092         struct wined3d_surface *surface, const RECT *rect, float depth)
7093 {
7094     FIXME("Depth filling not implemented by cpu_blit.\n");
7095     return WINED3DERR_INVALIDCALL;
7096 }
7097
7098 const struct blit_shader cpu_blit =  {
7099     cpu_blit_alloc,
7100     cpu_blit_free,
7101     cpu_blit_set,
7102     cpu_blit_unset,
7103     cpu_blit_supported,
7104     cpu_blit_color_fill,
7105     cpu_blit_depth_fill,
7106 };
7107
7108 static HRESULT surface_init(struct wined3d_surface *surface, WINED3DSURFTYPE surface_type, UINT alignment,
7109         UINT width, UINT height, UINT level, WINED3DMULTISAMPLE_TYPE multisample_type,
7110         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7111         WINED3DPOOL pool, DWORD flags, void *parent, const struct wined3d_parent_ops *parent_ops)
7112 {
7113     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7114     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7115     BOOL lockable = flags & WINED3D_SURFACE_MAPPABLE;
7116     unsigned int resource_size;
7117     HRESULT hr;
7118
7119     if (multisample_quality > 0)
7120     {
7121         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7122         multisample_quality = 0;
7123     }
7124
7125     /* Quick lockable sanity check.
7126      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7127      * this function is too deep to need to care about things like this.
7128      * Levels need to be checked too, since they all affect what can be done. */
7129     switch (pool)
7130     {
7131         case WINED3DPOOL_SCRATCH:
7132             if (!lockable)
7133             {
7134                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7135                         "which are mutually exclusive, setting lockable to TRUE.\n");
7136                 lockable = TRUE;
7137             }
7138             break;
7139
7140         case WINED3DPOOL_SYSTEMMEM:
7141             if (!lockable)
7142                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7143             break;
7144
7145         case WINED3DPOOL_MANAGED:
7146             if (usage & WINED3DUSAGE_DYNAMIC)
7147                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7148             break;
7149
7150         case WINED3DPOOL_DEFAULT:
7151             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7152                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7153             break;
7154
7155         default:
7156             FIXME("Unknown pool %#x.\n", pool);
7157             break;
7158     };
7159
7160     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3DPOOL_DEFAULT)
7161         FIXME("Trying to create a render target that isn't in the default pool.\n");
7162
7163     /* FIXME: Check that the format is supported by the device. */
7164
7165     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7166     if (!resource_size)
7167         return WINED3DERR_INVALIDCALL;
7168
7169     surface->surface_type = surface_type;
7170
7171     switch (surface_type)
7172     {
7173         case SURFACE_OPENGL:
7174             surface->surface_ops = &surface_ops;
7175             break;
7176
7177         case SURFACE_GDI:
7178             surface->surface_ops = &gdi_surface_ops;
7179             break;
7180
7181         default:
7182             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7183             return WINED3DERR_INVALIDCALL;
7184     }
7185
7186     hr = resource_init(&surface->resource, device, WINED3DRTYPE_SURFACE, format,
7187             multisample_type, multisample_quality, usage, pool, width, height, 1,
7188             resource_size, parent, parent_ops, &surface_resource_ops);
7189     if (FAILED(hr))
7190     {
7191         WARN("Failed to initialize resource, returning %#x.\n", hr);
7192         return hr;
7193     }
7194
7195     /* "Standalone" surface. */
7196     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7197
7198     surface->texture_level = level;
7199     list_init(&surface->overlays);
7200
7201     /* Flags */
7202     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7203     if (flags & WINED3D_SURFACE_DISCARD)
7204         surface->flags |= SFLAG_DISCARD;
7205     if (flags & WINED3D_SURFACE_PIN_SYSMEM)
7206         surface->flags |= SFLAG_PIN_SYSMEM;
7207     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7208         surface->flags |= SFLAG_LOCKABLE;
7209     /* I'm not sure if this qualifies as a hack or as an optimization. It
7210      * seems reasonable to assume that lockable render targets will get
7211      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7212      * creation. However, the other reason we want to do this is that several
7213      * ddraw applications access surface memory while the surface isn't
7214      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7215      * future locks prevents these from crashing. */
7216     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7217         surface->flags |= SFLAG_DYNLOCK;
7218
7219     /* Mark the texture as dirty so that it gets loaded first time around. */
7220     surface_add_dirty_rect(surface, NULL);
7221     list_init(&surface->renderbuffers);
7222
7223     TRACE("surface %p, memory %p, size %u\n",
7224             surface, surface->resource.allocatedMemory, surface->resource.size);
7225
7226     /* Call the private setup routine */
7227     hr = surface->surface_ops->surface_private_setup(surface);
7228     if (FAILED(hr))
7229     {
7230         ERR("Private setup failed, returning %#x\n", hr);
7231         surface_cleanup(surface);
7232         return hr;
7233     }
7234
7235     /* Similar to lockable rendertargets above, creating the DIB section
7236      * during surface initialization prevents the sysmem pointer from changing
7237      * after a wined3d_surface_getdc() call. */
7238     if ((usage & WINED3DUSAGE_OWNDC) && !surface->hDC
7239             && SUCCEEDED(surface_create_dib_section(surface)))
7240     {
7241         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
7242         surface->resource.heapMemory = NULL;
7243         surface->resource.allocatedMemory = surface->dib.bitmap_data;
7244     }
7245
7246     return hr;
7247 }
7248
7249 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7250         enum wined3d_format_id format_id, UINT level, DWORD usage, WINED3DPOOL pool,
7251         WINED3DMULTISAMPLE_TYPE multisample_type, DWORD multisample_quality, WINED3DSURFTYPE surface_type,
7252         DWORD flags, void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7253 {
7254     struct wined3d_surface *object;
7255     HRESULT hr;
7256
7257     TRACE("device %p, width %u, height %u, format %s, level %u\n",
7258             device, width, height, debug_d3dformat(format_id), level);
7259     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7260             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7261     TRACE("surface_type %#x, flags %#x, parent %p, parent_ops %p.\n", surface_type, flags, parent, parent_ops);
7262
7263     if (surface_type == SURFACE_OPENGL && !device->adapter)
7264     {
7265         ERR("OpenGL surfaces are not available without OpenGL.\n");
7266         return WINED3DERR_NOTAVAILABLE;
7267     }
7268
7269     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7270     if (!object)
7271     {
7272         ERR("Failed to allocate surface memory.\n");
7273         return WINED3DERR_OUTOFVIDEOMEMORY;
7274     }
7275
7276     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level,
7277             multisample_type, multisample_quality, device, usage, format_id, pool, flags, parent, parent_ops);
7278     if (FAILED(hr))
7279     {
7280         WARN("Failed to initialize surface, returning %#x.\n", hr);
7281         HeapFree(GetProcessHeap(), 0, object);
7282         return hr;
7283     }
7284
7285     TRACE("Created surface %p.\n", object);
7286     *surface = object;
7287
7288     return hr;
7289 }