po: Update French translation.
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2011 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         enum wined3d_texture_filter_type filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     struct wined3d_surface *overlay, *cur;
46
47     TRACE("surface %p.\n", surface);
48
49     if (surface->texture_name || (surface->flags & SFLAG_PBO)
50              || surface->rb_multisample || surface->rb_resolved
51              || !list_empty(&surface->renderbuffers))
52     {
53         struct wined3d_renderbuffer_entry *entry, *entry2;
54         const struct wined3d_gl_info *gl_info;
55         struct wined3d_context *context;
56
57         context = context_acquire(surface->resource.device, NULL);
58         gl_info = context->gl_info;
59
60         ENTER_GL();
61
62         if (surface->texture_name)
63         {
64             TRACE("Deleting texture %u.\n", surface->texture_name);
65             glDeleteTextures(1, &surface->texture_name);
66         }
67
68         if (surface->flags & SFLAG_PBO)
69         {
70             TRACE("Deleting PBO %u.\n", surface->pbo);
71             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
72         }
73
74         if (surface->rb_multisample)
75         {
76             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
77             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
78         }
79
80         if (surface->rb_resolved)
81         {
82             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
83             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
84         }
85
86         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
87         {
88             TRACE("Deleting renderbuffer %u.\n", entry->id);
89             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
90             HeapFree(GetProcessHeap(), 0, entry);
91         }
92
93         LEAVE_GL();
94
95         context_release(context);
96     }
97
98     if (surface->flags & SFLAG_DIBSECTION)
99     {
100         DeleteDC(surface->hDC);
101         DeleteObject(surface->dib.DIBsection);
102         surface->dib.bitmap_data = NULL;
103         surface->resource.allocatedMemory = NULL;
104     }
105
106     if (surface->flags & SFLAG_USERPTR)
107         wined3d_surface_set_mem(surface, NULL);
108     if (surface->overlay_dest)
109         list_remove(&surface->overlay_entry);
110
111     LIST_FOR_EACH_ENTRY_SAFE(overlay, cur, &surface->overlays, struct wined3d_surface, overlay_entry)
112     {
113         list_remove(&overlay->overlay_entry);
114         overlay->overlay_dest = NULL;
115     }
116
117     resource_cleanup(&surface->resource);
118 }
119
120 void surface_update_draw_binding(struct wined3d_surface *surface)
121 {
122     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
123         surface->draw_binding = SFLAG_INDRAWABLE;
124     else if (surface->resource.multisample_type)
125         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
126     else
127         surface->draw_binding = SFLAG_INTEXTURE;
128 }
129
130 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
131 {
132     TRACE("surface %p, container %p.\n", surface, container);
133
134     if (!container && type != WINED3D_CONTAINER_NONE)
135         ERR("Setting NULL container of type %#x.\n", type);
136
137     if (type == WINED3D_CONTAINER_SWAPCHAIN)
138     {
139         surface->get_drawable_size = get_drawable_size_swapchain;
140     }
141     else
142     {
143         switch (wined3d_settings.offscreen_rendering_mode)
144         {
145             case ORM_FBO:
146                 surface->get_drawable_size = get_drawable_size_fbo;
147                 break;
148
149             case ORM_BACKBUFFER:
150                 surface->get_drawable_size = get_drawable_size_backbuffer;
151                 break;
152
153             default:
154                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
155                 return;
156         }
157     }
158
159     surface->container.type = type;
160     surface->container.u.base = container;
161     surface_update_draw_binding(surface);
162 }
163
164 struct blt_info
165 {
166     GLenum binding;
167     GLenum bind_target;
168     enum tex_types tex_type;
169     GLfloat coords[4][3];
170 };
171
172 struct float_rect
173 {
174     float l;
175     float t;
176     float r;
177     float b;
178 };
179
180 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
181 {
182     f->l = ((r->left * 2.0f) / w) - 1.0f;
183     f->t = ((r->top * 2.0f) / h) - 1.0f;
184     f->r = ((r->right * 2.0f) / w) - 1.0f;
185     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
186 }
187
188 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
189 {
190     GLfloat (*coords)[3] = info->coords;
191     struct float_rect f;
192
193     switch (target)
194     {
195         default:
196             FIXME("Unsupported texture target %#x\n", target);
197             /* Fall back to GL_TEXTURE_2D */
198         case GL_TEXTURE_2D:
199             info->binding = GL_TEXTURE_BINDING_2D;
200             info->bind_target = GL_TEXTURE_2D;
201             info->tex_type = tex_2d;
202             coords[0][0] = (float)rect->left / w;
203             coords[0][1] = (float)rect->top / h;
204             coords[0][2] = 0.0f;
205
206             coords[1][0] = (float)rect->right / w;
207             coords[1][1] = (float)rect->top / h;
208             coords[1][2] = 0.0f;
209
210             coords[2][0] = (float)rect->left / w;
211             coords[2][1] = (float)rect->bottom / h;
212             coords[2][2] = 0.0f;
213
214             coords[3][0] = (float)rect->right / w;
215             coords[3][1] = (float)rect->bottom / h;
216             coords[3][2] = 0.0f;
217             break;
218
219         case GL_TEXTURE_RECTANGLE_ARB:
220             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
221             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
222             info->tex_type = tex_rect;
223             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
224             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
225             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
226             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
227             break;
228
229         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
230             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
231             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
232             info->tex_type = tex_cube;
233             cube_coords_float(rect, w, h, &f);
234
235             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
236             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
237             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
238             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
239             break;
240
241         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
242             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
243             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
244             info->tex_type = tex_cube;
245             cube_coords_float(rect, w, h, &f);
246
247             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
248             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
249             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
250             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
251             break;
252
253         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
254             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
255             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
256             info->tex_type = tex_cube;
257             cube_coords_float(rect, w, h, &f);
258
259             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
260             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
261             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
262             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
263             break;
264
265         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
266             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
267             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
268             info->tex_type = tex_cube;
269             cube_coords_float(rect, w, h, &f);
270
271             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
272             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
273             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
274             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
275             break;
276
277         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
278             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
279             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
280             info->tex_type = tex_cube;
281             cube_coords_float(rect, w, h, &f);
282
283             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
284             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
285             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
286             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
287             break;
288
289         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
290             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
291             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
292             info->tex_type = tex_cube;
293             cube_coords_float(rect, w, h, &f);
294
295             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
296             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
297             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
298             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
299             break;
300     }
301 }
302
303 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
304 {
305     if (rect_in)
306         *rect_out = *rect_in;
307     else
308     {
309         rect_out->left = 0;
310         rect_out->top = 0;
311         rect_out->right = surface->resource.width;
312         rect_out->bottom = surface->resource.height;
313     }
314 }
315
316 /* GL locking and context activation is done by the caller */
317 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
318         const RECT *src_rect, const RECT *dst_rect, enum wined3d_texture_filter_type filter)
319 {
320     struct blt_info info;
321
322     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
323
324     glEnable(info.bind_target);
325     checkGLcall("glEnable(bind_target)");
326
327     context_bind_texture(context, info.bind_target, src_surface->texture_name);
328
329     /* Filtering for StretchRect */
330     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
331             wined3d_gl_mag_filter(magLookup, filter));
332     checkGLcall("glTexParameteri");
333     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
334             wined3d_gl_min_mip_filter(minMipLookup, filter, WINED3D_TEXF_NONE));
335     checkGLcall("glTexParameteri");
336     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
337     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
338     if (context->gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
339         glTexParameteri(info.bind_target, GL_TEXTURE_SRGB_DECODE_EXT, GL_SKIP_DECODE_EXT);
340     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
341     checkGLcall("glTexEnvi");
342
343     /* Draw a quad */
344     glBegin(GL_TRIANGLE_STRIP);
345     glTexCoord3fv(info.coords[0]);
346     glVertex2i(dst_rect->left, dst_rect->top);
347
348     glTexCoord3fv(info.coords[1]);
349     glVertex2i(dst_rect->right, dst_rect->top);
350
351     glTexCoord3fv(info.coords[2]);
352     glVertex2i(dst_rect->left, dst_rect->bottom);
353
354     glTexCoord3fv(info.coords[3]);
355     glVertex2i(dst_rect->right, dst_rect->bottom);
356     glEnd();
357
358     /* Unbind the texture */
359     context_bind_texture(context, info.bind_target, 0);
360
361     /* We changed the filtering settings on the texture. Inform the
362      * container about this to get the filters reset properly next draw. */
363     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
364     {
365         struct wined3d_texture *texture = src_surface->container.u.texture;
366         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3D_TEXF_POINT;
367         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3D_TEXF_POINT;
368         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3D_TEXF_NONE;
369         texture->texture_rgb.states[WINED3DTEXSTA_SRGBTEXTURE] = FALSE;
370     }
371 }
372
373 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
374 {
375     const struct wined3d_format *format = surface->resource.format;
376     SYSTEM_INFO sysInfo;
377     BITMAPINFO *b_info;
378     int extraline = 0;
379     DWORD *masks;
380
381     TRACE("surface %p.\n", surface);
382
383     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
384     {
385         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
386         return WINED3DERR_INVALIDCALL;
387     }
388
389     switch (format->byte_count)
390     {
391         case 2:
392         case 4:
393             /* Allocate extra space to store the RGB bit masks. */
394             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
395             break;
396
397         case 3:
398             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
399             break;
400
401         default:
402             /* Allocate extra space for a palette. */
403             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
404                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
405             break;
406     }
407
408     if (!b_info)
409         return E_OUTOFMEMORY;
410
411     /* Some applications access the surface in via DWORDs, and do not take
412      * the necessary care at the end of the surface. So we need at least
413      * 4 extra bytes at the end of the surface. Check against the page size,
414      * if the last page used for the surface has at least 4 spare bytes we're
415      * safe, otherwise add an extra line to the DIB section. */
416     GetSystemInfo(&sysInfo);
417     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
418     {
419         extraline = 1;
420         TRACE("Adding an extra line to the DIB section.\n");
421     }
422
423     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
424     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
425     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
426     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
427     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
428             * wined3d_surface_get_pitch(surface);
429     b_info->bmiHeader.biPlanes = 1;
430     b_info->bmiHeader.biBitCount = format->byte_count * 8;
431
432     b_info->bmiHeader.biXPelsPerMeter = 0;
433     b_info->bmiHeader.biYPelsPerMeter = 0;
434     b_info->bmiHeader.biClrUsed = 0;
435     b_info->bmiHeader.biClrImportant = 0;
436
437     /* Get the bit masks */
438     masks = (DWORD *)b_info->bmiColors;
439     switch (surface->resource.format->id)
440     {
441         case WINED3DFMT_B8G8R8_UNORM:
442             b_info->bmiHeader.biCompression = BI_RGB;
443             break;
444
445         case WINED3DFMT_B5G5R5X1_UNORM:
446         case WINED3DFMT_B5G5R5A1_UNORM:
447         case WINED3DFMT_B4G4R4A4_UNORM:
448         case WINED3DFMT_B4G4R4X4_UNORM:
449         case WINED3DFMT_B2G3R3_UNORM:
450         case WINED3DFMT_B2G3R3A8_UNORM:
451         case WINED3DFMT_R10G10B10A2_UNORM:
452         case WINED3DFMT_R8G8B8A8_UNORM:
453         case WINED3DFMT_R8G8B8X8_UNORM:
454         case WINED3DFMT_B10G10R10A2_UNORM:
455         case WINED3DFMT_B5G6R5_UNORM:
456         case WINED3DFMT_R16G16B16A16_UNORM:
457             b_info->bmiHeader.biCompression = BI_BITFIELDS;
458             masks[0] = format->red_mask;
459             masks[1] = format->green_mask;
460             masks[2] = format->blue_mask;
461             break;
462
463         default:
464             /* Don't know palette */
465             b_info->bmiHeader.biCompression = BI_RGB;
466             break;
467     }
468
469     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
470             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
471             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
472     surface->dib.DIBsection = CreateDIBSection(0, b_info, DIB_RGB_COLORS, &surface->dib.bitmap_data, 0, 0);
473
474     if (!surface->dib.DIBsection)
475     {
476         ERR("Failed to create DIB section.\n");
477         HeapFree(GetProcessHeap(), 0, b_info);
478         return HRESULT_FROM_WIN32(GetLastError());
479     }
480
481     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
482     /* Copy the existing surface to the dib section. */
483     if (surface->resource.allocatedMemory)
484     {
485         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
486                 surface->resource.height * wined3d_surface_get_pitch(surface));
487     }
488     else
489     {
490         /* This is to make maps read the GL texture although memory is allocated. */
491         surface->flags &= ~SFLAG_INSYSMEM;
492     }
493     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
494
495     HeapFree(GetProcessHeap(), 0, b_info);
496
497     /* Now allocate a DC. */
498     surface->hDC = CreateCompatibleDC(0);
499     SelectObject(surface->hDC, surface->dib.DIBsection);
500     TRACE("Using wined3d palette %p.\n", surface->palette);
501     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
502
503     surface->flags |= SFLAG_DIBSECTION;
504
505     return WINED3D_OK;
506 }
507
508 static BOOL surface_need_pbo(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
509 {
510     if (surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
511         return FALSE;
512     if (!(surface->flags & SFLAG_DYNLOCK))
513         return FALSE;
514     if (surface->flags & (SFLAG_CONVERTED | SFLAG_NONPOW2 | SFLAG_PIN_SYSMEM))
515         return FALSE;
516     if (!gl_info->supported[ARB_PIXEL_BUFFER_OBJECT])
517         return FALSE;
518
519     return TRUE;
520 }
521
522 static void surface_load_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
523 {
524     struct wined3d_context *context;
525     GLenum error;
526
527     context = context_acquire(surface->resource.device, NULL);
528     ENTER_GL();
529
530     GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
531     error = glGetError();
532     if (!surface->pbo || error != GL_NO_ERROR)
533         ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
534
535     TRACE("Binding PBO %u.\n", surface->pbo);
536
537     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
538     checkGLcall("glBindBufferARB");
539
540     GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
541             surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
542     checkGLcall("glBufferDataARB");
543
544     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
545     checkGLcall("glBindBufferARB");
546
547     /* We don't need the system memory anymore and we can't even use it for PBOs. */
548     if (!(surface->flags & SFLAG_CLIENT))
549     {
550         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
551         surface->resource.heapMemory = NULL;
552     }
553     surface->resource.allocatedMemory = NULL;
554     surface->flags |= SFLAG_PBO;
555     LEAVE_GL();
556     context_release(context);
557 }
558
559 static void surface_prepare_system_memory(struct wined3d_surface *surface)
560 {
561     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
562
563     TRACE("surface %p.\n", surface);
564
565     if (!(surface->flags & SFLAG_PBO) && surface_need_pbo(surface, gl_info))
566         surface_load_pbo(surface, gl_info);
567     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
568     {
569         /* Whatever surface we have, make sure that there is memory allocated
570          * for the downloaded copy, or a PBO to map. */
571         if (!surface->resource.heapMemory)
572             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
573
574         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
575                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
576
577         if (surface->flags & SFLAG_INSYSMEM)
578             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
579     }
580 }
581
582 static void surface_evict_sysmem(struct wined3d_surface *surface)
583 {
584     if (surface->resource.map_count || (surface->flags & SFLAG_DONOTFREE))
585         return;
586
587     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
588     surface->resource.allocatedMemory = NULL;
589     surface->resource.heapMemory = NULL;
590     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
591 }
592
593 /* Context activation is done by the caller. */
594 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
595         struct wined3d_context *context, BOOL srgb)
596 {
597     struct wined3d_device *device = surface->resource.device;
598     DWORD active_sampler;
599
600     /* We don't need a specific texture unit, but after binding the texture
601      * the current unit is dirty. Read the unit back instead of switching to
602      * 0, this avoids messing around with the state manager's GL states. The
603      * current texture unit should always be a valid one.
604      *
605      * To be more specific, this is tricky because we can implicitly be
606      * called from sampler() in state.c. This means we can't touch anything
607      * other than whatever happens to be the currently active texture, or we
608      * would risk marking already applied sampler states dirty again. */
609     active_sampler = device->rev_tex_unit_map[context->active_texture];
610
611     if (active_sampler != WINED3D_UNMAPPED_STAGE)
612         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
613     surface_bind(surface, context, srgb);
614 }
615
616 static void surface_force_reload(struct wined3d_surface *surface)
617 {
618     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
619 }
620
621 static void surface_release_client_storage(struct wined3d_surface *surface)
622 {
623     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
624
625     ENTER_GL();
626     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
627     if (surface->texture_name)
628     {
629         surface_bind_and_dirtify(surface, context, FALSE);
630         glTexImage2D(surface->texture_target, surface->texture_level,
631                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
632     }
633     if (surface->texture_name_srgb)
634     {
635         surface_bind_and_dirtify(surface, context, TRUE);
636         glTexImage2D(surface->texture_target, surface->texture_level,
637                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
638     }
639     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
640     LEAVE_GL();
641
642     context_release(context);
643
644     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
645     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
646     surface_force_reload(surface);
647 }
648
649 static HRESULT surface_private_setup(struct wined3d_surface *surface)
650 {
651     /* TODO: Check against the maximum texture sizes supported by the video card. */
652     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
653     unsigned int pow2Width, pow2Height;
654
655     TRACE("surface %p.\n", surface);
656
657     surface->texture_name = 0;
658     surface->texture_target = GL_TEXTURE_2D;
659
660     /* Non-power2 support */
661     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
662     {
663         pow2Width = surface->resource.width;
664         pow2Height = surface->resource.height;
665     }
666     else
667     {
668         /* Find the nearest pow2 match */
669         pow2Width = pow2Height = 1;
670         while (pow2Width < surface->resource.width)
671             pow2Width <<= 1;
672         while (pow2Height < surface->resource.height)
673             pow2Height <<= 1;
674     }
675     surface->pow2Width = pow2Width;
676     surface->pow2Height = pow2Height;
677
678     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
679     {
680         /* TODO: Add support for non power two compressed textures. */
681         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
682         {
683             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
684                   surface, surface->resource.width, surface->resource.height);
685             return WINED3DERR_NOTAVAILABLE;
686         }
687     }
688
689     if (pow2Width != surface->resource.width
690             || pow2Height != surface->resource.height)
691     {
692         surface->flags |= SFLAG_NONPOW2;
693     }
694
695     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
696             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
697     {
698         /* One of three options:
699          * 1: Do the same as we do with NPOT and scale the texture, (any
700          *    texture ops would require the texture to be scaled which is
701          *    potentially slow)
702          * 2: Set the texture to the maximum size (bad idea).
703          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
704          * 4: Create the surface, but allow it to be used only for DirectDraw
705          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
706          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
707          *    the render target. */
708         if (surface->resource.pool == WINED3D_POOL_DEFAULT || surface->resource.pool == WINED3D_POOL_MANAGED)
709         {
710             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
711             return WINED3DERR_NOTAVAILABLE;
712         }
713
714         /* We should never use this surface in combination with OpenGL! */
715         TRACE("Creating an oversized surface: %ux%u.\n",
716                 surface->pow2Width, surface->pow2Height);
717     }
718     else
719     {
720         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
721          * and EXT_PALETTED_TEXTURE is used in combination with texture
722          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
723          * EXT_PALETTED_TEXTURE doesn't work in combination with
724          * ARB_TEXTURE_RECTANGLE. */
725         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
726                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
727                 && gl_info->supported[EXT_PALETTED_TEXTURE]
728                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
729         {
730             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
731             surface->pow2Width = surface->resource.width;
732             surface->pow2Height = surface->resource.height;
733             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
734         }
735     }
736
737     switch (wined3d_settings.offscreen_rendering_mode)
738     {
739         case ORM_FBO:
740             surface->get_drawable_size = get_drawable_size_fbo;
741             break;
742
743         case ORM_BACKBUFFER:
744             surface->get_drawable_size = get_drawable_size_backbuffer;
745             break;
746
747         default:
748             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
749             return WINED3DERR_INVALIDCALL;
750     }
751
752     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
753         surface->flags |= SFLAG_DISCARDED;
754
755     return WINED3D_OK;
756 }
757
758 static void surface_realize_palette(struct wined3d_surface *surface)
759 {
760     struct wined3d_palette *palette = surface->palette;
761
762     TRACE("surface %p.\n", surface);
763
764     if (!palette) return;
765
766     if (surface->resource.format->id == WINED3DFMT_P8_UINT
767             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
768     {
769         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
770         {
771             /* Make sure the texture is up to date. This call doesn't do
772              * anything if the texture is already up to date. */
773             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
774
775             /* We want to force a palette refresh, so mark the drawable as not being up to date */
776             if (!surface_is_offscreen(surface))
777                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
778         }
779         else
780         {
781             if (!(surface->flags & SFLAG_INSYSMEM))
782             {
783                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
784                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
785             }
786             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
787         }
788     }
789
790     if (surface->flags & SFLAG_DIBSECTION)
791     {
792         RGBQUAD col[256];
793         unsigned int i;
794
795         TRACE("Updating the DC's palette.\n");
796
797         for (i = 0; i < 256; ++i)
798         {
799             col[i].rgbRed   = palette->palents[i].peRed;
800             col[i].rgbGreen = palette->palents[i].peGreen;
801             col[i].rgbBlue  = palette->palents[i].peBlue;
802             col[i].rgbReserved = 0;
803         }
804         SetDIBColorTable(surface->hDC, 0, 256, col);
805     }
806
807     /* Propagate the changes to the drawable when we have a palette. */
808     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
809         surface_load_location(surface, surface->draw_binding, NULL);
810 }
811
812 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
813 {
814     HRESULT hr;
815
816     /* If there's no destination surface there is nothing to do. */
817     if (!surface->overlay_dest)
818         return WINED3D_OK;
819
820     /* Blt calls ModifyLocation on the dest surface, which in turn calls
821      * DrawOverlay to update the overlay. Prevent an endless recursion. */
822     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
823         return WINED3D_OK;
824
825     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
826     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
827             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3D_TEXF_LINEAR);
828     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
829
830     return hr;
831 }
832
833 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
834 {
835     struct wined3d_device *device = surface->resource.device;
836     const RECT *pass_rect = rect;
837
838     TRACE("surface %p, rect %s, flags %#x.\n",
839             surface, wine_dbgstr_rect(rect), flags);
840
841     if (flags & WINED3DLOCK_DISCARD)
842     {
843         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
844         surface_prepare_system_memory(surface);
845         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
846     }
847     else
848     {
849         /* surface_load_location() does not check if the rectangle specifies
850          * the full surface. Most callers don't need that, so do it here. */
851         if (rect && !rect->top && !rect->left
852                 && rect->right == surface->resource.width
853                 && rect->bottom == surface->resource.height)
854             pass_rect = NULL;
855         surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
856     }
857
858     if (surface->flags & SFLAG_PBO)
859     {
860         const struct wined3d_gl_info *gl_info;
861         struct wined3d_context *context;
862
863         context = context_acquire(device, NULL);
864         gl_info = context->gl_info;
865
866         ENTER_GL();
867         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
868         checkGLcall("glBindBufferARB");
869
870         /* This shouldn't happen but could occur if some other function
871          * didn't handle the PBO properly. */
872         if (surface->resource.allocatedMemory)
873             ERR("The surface already has PBO memory allocated.\n");
874
875         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
876         checkGLcall("glMapBufferARB");
877
878         /* Make sure the PBO isn't set anymore in order not to break non-PBO
879          * calls. */
880         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
881         checkGLcall("glBindBufferARB");
882
883         LEAVE_GL();
884         context_release(context);
885     }
886
887     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
888     {
889         if (!rect)
890             surface_add_dirty_rect(surface, NULL);
891         else
892         {
893             struct wined3d_box b;
894
895             b.left = rect->left;
896             b.top = rect->top;
897             b.right = rect->right;
898             b.bottom = rect->bottom;
899             b.front = 0;
900             b.back = 1;
901             surface_add_dirty_rect(surface, &b);
902         }
903     }
904 }
905
906 static void surface_unmap(struct wined3d_surface *surface)
907 {
908     struct wined3d_device *device = surface->resource.device;
909     BOOL fullsurface;
910
911     TRACE("surface %p.\n", surface);
912
913     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
914
915     if (surface->flags & SFLAG_PBO)
916     {
917         const struct wined3d_gl_info *gl_info;
918         struct wined3d_context *context;
919
920         TRACE("Freeing PBO memory.\n");
921
922         context = context_acquire(device, NULL);
923         gl_info = context->gl_info;
924
925         ENTER_GL();
926         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
927         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
928         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
929         checkGLcall("glUnmapBufferARB");
930         LEAVE_GL();
931         context_release(context);
932
933         surface->resource.allocatedMemory = NULL;
934     }
935
936     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
937
938     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
939     {
940         TRACE("Not dirtified, nothing to do.\n");
941         goto done;
942     }
943
944     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
945             && surface->container.u.swapchain->front_buffer == surface)
946     {
947         if (!surface->dirtyRect.left && !surface->dirtyRect.top
948                 && surface->dirtyRect.right == surface->resource.width
949                 && surface->dirtyRect.bottom == surface->resource.height)
950         {
951             fullsurface = TRUE;
952         }
953         else
954         {
955             /* TODO: Proper partial rectangle tracking. */
956             fullsurface = FALSE;
957             surface->flags |= SFLAG_INSYSMEM;
958         }
959
960         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
961
962         /* Partial rectangle tracking is not commonly implemented, it is only
963          * done for render targets. INSYSMEM was set before to tell
964          * surface_load_location() where to read the rectangle from.
965          * Indrawable is set because all modifications from the partial
966          * sysmem copy are written back to the drawable, thus the surface is
967          * merged again in the drawable. The sysmem copy is not fully up to
968          * date because only a subrectangle was read in Map(). */
969         if (!fullsurface)
970         {
971             surface_modify_location(surface, surface->draw_binding, TRUE);
972             surface_evict_sysmem(surface);
973         }
974
975         surface->dirtyRect.left = surface->resource.width;
976         surface->dirtyRect.top = surface->resource.height;
977         surface->dirtyRect.right = 0;
978         surface->dirtyRect.bottom = 0;
979     }
980     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
981     {
982         FIXME("Depth / stencil buffer locking is not implemented.\n");
983     }
984
985 done:
986     /* Overlays have to be redrawn manually after changes with the GL implementation */
987     if (surface->overlay_dest)
988         surface_draw_overlay(surface);
989 }
990
991 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
992 {
993     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
994         return FALSE;
995     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
996         return FALSE;
997     return TRUE;
998 }
999
1000 static void wined3d_surface_depth_blt_fbo(const struct wined3d_device *device, struct wined3d_surface *src_surface,
1001         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1002 {
1003     const struct wined3d_gl_info *gl_info;
1004     struct wined3d_context *context;
1005     DWORD src_mask, dst_mask;
1006     GLbitfield gl_mask;
1007
1008     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1009             device, src_surface, wine_dbgstr_rect(src_rect),
1010             dst_surface, wine_dbgstr_rect(dst_rect));
1011
1012     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1013     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1014
1015     if (src_mask != dst_mask)
1016     {
1017         ERR("Incompatible formats %s and %s.\n",
1018                 debug_d3dformat(src_surface->resource.format->id),
1019                 debug_d3dformat(dst_surface->resource.format->id));
1020         return;
1021     }
1022
1023     if (!src_mask)
1024     {
1025         ERR("Not a depth / stencil format: %s.\n",
1026                 debug_d3dformat(src_surface->resource.format->id));
1027         return;
1028     }
1029
1030     gl_mask = 0;
1031     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1032         gl_mask |= GL_DEPTH_BUFFER_BIT;
1033     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1034         gl_mask |= GL_STENCIL_BUFFER_BIT;
1035
1036     /* Make sure the locations are up-to-date. Loading the destination
1037      * surface isn't required if the entire surface is overwritten. */
1038     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1039     if (!surface_is_full_rect(dst_surface, dst_rect))
1040         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1041
1042     context = context_acquire(device, NULL);
1043     if (!context->valid)
1044     {
1045         context_release(context);
1046         WARN("Invalid context, skipping blit.\n");
1047         return;
1048     }
1049
1050     gl_info = context->gl_info;
1051
1052     ENTER_GL();
1053
1054     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1055     glReadBuffer(GL_NONE);
1056     checkGLcall("glReadBuffer()");
1057     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1058
1059     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1060     context_set_draw_buffer(context, GL_NONE);
1061     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1062     context_invalidate_state(context, STATE_FRAMEBUFFER);
1063
1064     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1065     {
1066         glDepthMask(GL_TRUE);
1067         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_ZWRITEENABLE));
1068     }
1069     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1070     {
1071         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1072         {
1073             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1074             context_invalidate_state(context, STATE_RENDER(WINED3D_RS_TWOSIDEDSTENCILMODE));
1075         }
1076         glStencilMask(~0U);
1077         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_STENCILWRITEMASK));
1078     }
1079
1080     glDisable(GL_SCISSOR_TEST);
1081     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1082
1083     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1084             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1085     checkGLcall("glBlitFramebuffer()");
1086
1087     LEAVE_GL();
1088
1089     if (wined3d_settings.strict_draw_ordering)
1090         wglFlush(); /* Flush to ensure ordering across contexts. */
1091
1092     context_release(context);
1093 }
1094
1095 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1096  * Depth / stencil is not supported. */
1097 static void surface_blt_fbo(const struct wined3d_device *device, enum wined3d_texture_filter_type filter,
1098         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1099         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1100 {
1101     const struct wined3d_gl_info *gl_info;
1102     struct wined3d_context *context;
1103     RECT src_rect, dst_rect;
1104     GLenum gl_filter;
1105     GLenum buffer;
1106
1107     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1108     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1109             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1110     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1111             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1112
1113     src_rect = *src_rect_in;
1114     dst_rect = *dst_rect_in;
1115
1116     switch (filter)
1117     {
1118         case WINED3D_TEXF_LINEAR:
1119             gl_filter = GL_LINEAR;
1120             break;
1121
1122         default:
1123             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1124         case WINED3D_TEXF_NONE:
1125         case WINED3D_TEXF_POINT:
1126             gl_filter = GL_NEAREST;
1127             break;
1128     }
1129
1130     /* Resolve the source surface first if needed. */
1131     if (src_location == SFLAG_INRB_MULTISAMPLE
1132             && (src_surface->resource.format->id != dst_surface->resource.format->id
1133                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1134                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1135         src_location = SFLAG_INRB_RESOLVED;
1136
1137     /* Make sure the locations are up-to-date. Loading the destination
1138      * surface isn't required if the entire surface is overwritten. (And is
1139      * in fact harmful if we're being called by surface_load_location() with
1140      * the purpose of loading the destination surface.) */
1141     surface_load_location(src_surface, src_location, NULL);
1142     if (!surface_is_full_rect(dst_surface, &dst_rect))
1143         surface_load_location(dst_surface, dst_location, NULL);
1144
1145     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1146     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1147     else context = context_acquire(device, NULL);
1148
1149     if (!context->valid)
1150     {
1151         context_release(context);
1152         WARN("Invalid context, skipping blit.\n");
1153         return;
1154     }
1155
1156     gl_info = context->gl_info;
1157
1158     if (src_location == SFLAG_INDRAWABLE)
1159     {
1160         TRACE("Source surface %p is onscreen.\n", src_surface);
1161         buffer = surface_get_gl_buffer(src_surface);
1162         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1163     }
1164     else
1165     {
1166         TRACE("Source surface %p is offscreen.\n", src_surface);
1167         buffer = GL_COLOR_ATTACHMENT0;
1168     }
1169
1170     ENTER_GL();
1171     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1172     glReadBuffer(buffer);
1173     checkGLcall("glReadBuffer()");
1174     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1175     LEAVE_GL();
1176
1177     if (dst_location == SFLAG_INDRAWABLE)
1178     {
1179         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1180         buffer = surface_get_gl_buffer(dst_surface);
1181         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1182     }
1183     else
1184     {
1185         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1186         buffer = GL_COLOR_ATTACHMENT0;
1187     }
1188
1189     ENTER_GL();
1190     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1191     context_set_draw_buffer(context, buffer);
1192     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1193     context_invalidate_state(context, STATE_FRAMEBUFFER);
1194
1195     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1196     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE));
1197     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE1));
1198     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE2));
1199     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE3));
1200
1201     glDisable(GL_SCISSOR_TEST);
1202     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1203
1204     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1205             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1206     checkGLcall("glBlitFramebuffer()");
1207
1208     LEAVE_GL();
1209
1210     if (wined3d_settings.strict_draw_ordering
1211             || (dst_location == SFLAG_INDRAWABLE
1212             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1213         wglFlush();
1214
1215     context_release(context);
1216 }
1217
1218 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1219         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
1220         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
1221 {
1222     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1223         return FALSE;
1224
1225     /* Source and/or destination need to be on the GL side */
1226     if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM)
1227         return FALSE;
1228
1229     switch (blit_op)
1230     {
1231         case WINED3D_BLIT_OP_COLOR_BLIT:
1232             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1233                 return FALSE;
1234             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1235                 return FALSE;
1236             break;
1237
1238         case WINED3D_BLIT_OP_DEPTH_BLIT:
1239             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1240                 return FALSE;
1241             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1242                 return FALSE;
1243             break;
1244
1245         default:
1246             return FALSE;
1247     }
1248
1249     if (!(src_format->id == dst_format->id
1250             || (is_identity_fixup(src_format->color_fixup)
1251             && is_identity_fixup(dst_format->color_fixup))))
1252         return FALSE;
1253
1254     return TRUE;
1255 }
1256
1257 /* This function checks if the primary render target uses the 8bit paletted format. */
1258 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1259 {
1260     if (device->fb.render_targets && device->fb.render_targets[0])
1261     {
1262         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1263         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1264                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1265             return TRUE;
1266     }
1267     return FALSE;
1268 }
1269
1270 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1271         DWORD color, struct wined3d_color *float_color)
1272 {
1273     const struct wined3d_format *format = surface->resource.format;
1274     const struct wined3d_device *device = surface->resource.device;
1275
1276     switch (format->id)
1277     {
1278         case WINED3DFMT_P8_UINT:
1279             if (surface->palette)
1280             {
1281                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1282                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1283                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1284             }
1285             else
1286             {
1287                 float_color->r = 0.0f;
1288                 float_color->g = 0.0f;
1289                 float_color->b = 0.0f;
1290             }
1291             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1292             break;
1293
1294         case WINED3DFMT_B5G6R5_UNORM:
1295             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1296             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1297             float_color->b = (color & 0x1f) / 31.0f;
1298             float_color->a = 1.0f;
1299             break;
1300
1301         case WINED3DFMT_B8G8R8_UNORM:
1302         case WINED3DFMT_B8G8R8X8_UNORM:
1303             float_color->r = D3DCOLOR_R(color);
1304             float_color->g = D3DCOLOR_G(color);
1305             float_color->b = D3DCOLOR_B(color);
1306             float_color->a = 1.0f;
1307             break;
1308
1309         case WINED3DFMT_B8G8R8A8_UNORM:
1310             float_color->r = D3DCOLOR_R(color);
1311             float_color->g = D3DCOLOR_G(color);
1312             float_color->b = D3DCOLOR_B(color);
1313             float_color->a = D3DCOLOR_A(color);
1314             break;
1315
1316         default:
1317             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1318             return FALSE;
1319     }
1320
1321     return TRUE;
1322 }
1323
1324 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1325 {
1326     const struct wined3d_format *format = surface->resource.format;
1327
1328     switch (format->id)
1329     {
1330         case WINED3DFMT_S1_UINT_D15_UNORM:
1331             *float_depth = depth / (float)0x00007fff;
1332             break;
1333
1334         case WINED3DFMT_D16_UNORM:
1335             *float_depth = depth / (float)0x0000ffff;
1336             break;
1337
1338         case WINED3DFMT_D24_UNORM_S8_UINT:
1339         case WINED3DFMT_X8D24_UNORM:
1340             *float_depth = depth / (float)0x00ffffff;
1341             break;
1342
1343         case WINED3DFMT_D32_UNORM:
1344             *float_depth = depth / (float)0xffffffff;
1345             break;
1346
1347         default:
1348             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1349             return FALSE;
1350     }
1351
1352     return TRUE;
1353 }
1354
1355 /* Do not call while under the GL lock. */
1356 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1357 {
1358     const struct wined3d_resource *resource = &surface->resource;
1359     struct wined3d_device *device = resource->device;
1360     const struct blit_shader *blitter;
1361
1362     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1363             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1364     if (!blitter)
1365     {
1366         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1367         return WINED3DERR_INVALIDCALL;
1368     }
1369
1370     return blitter->depth_fill(device, surface, rect, depth);
1371 }
1372
1373 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1374         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1375 {
1376     struct wined3d_device *device = src_surface->resource.device;
1377
1378     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1379             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1380             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1381         return WINED3DERR_INVALIDCALL;
1382
1383     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1384
1385     surface_modify_ds_location(dst_surface, SFLAG_INTEXTURE,
1386             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1387
1388     return WINED3D_OK;
1389 }
1390
1391 /* Do not call while under the GL lock. */
1392 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1393         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1394         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter)
1395 {
1396     struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1397     struct wined3d_device *device = dst_surface->resource.device;
1398     DWORD src_ds_flags, dst_ds_flags;
1399     RECT src_rect, dst_rect;
1400     BOOL scale, convert;
1401
1402     static const DWORD simple_blit = WINEDDBLT_ASYNC
1403             | WINEDDBLT_COLORFILL
1404             | WINEDDBLT_WAIT
1405             | WINEDDBLT_DEPTHFILL
1406             | WINEDDBLT_DONOTWAIT;
1407
1408     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1409             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1410             flags, fx, debug_d3dtexturefiltertype(filter));
1411     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1412
1413     if (fx)
1414     {
1415         TRACE("dwSize %#x.\n", fx->dwSize);
1416         TRACE("dwDDFX %#x.\n", fx->dwDDFX);
1417         TRACE("dwROP %#x.\n", fx->dwROP);
1418         TRACE("dwDDROP %#x.\n", fx->dwDDROP);
1419         TRACE("dwRotationAngle %#x.\n", fx->dwRotationAngle);
1420         TRACE("dwZBufferOpCode %#x.\n", fx->dwZBufferOpCode);
1421         TRACE("dwZBufferLow %#x.\n", fx->dwZBufferLow);
1422         TRACE("dwZBufferHigh %#x.\n", fx->dwZBufferHigh);
1423         TRACE("dwZBufferBaseDest %#x.\n", fx->dwZBufferBaseDest);
1424         TRACE("dwZDestConstBitDepth %#x.\n", fx->dwZDestConstBitDepth);
1425         TRACE("lpDDSZBufferDest %p.\n", fx->u1.lpDDSZBufferDest);
1426         TRACE("dwZSrcConstBitDepth %#x.\n", fx->dwZSrcConstBitDepth);
1427         TRACE("lpDDSZBufferSrc %p.\n", fx->u2.lpDDSZBufferSrc);
1428         TRACE("dwAlphaEdgeBlendBitDepth %#x.\n", fx->dwAlphaEdgeBlendBitDepth);
1429         TRACE("dwAlphaEdgeBlend %#x.\n", fx->dwAlphaEdgeBlend);
1430         TRACE("dwReserved %#x.\n", fx->dwReserved);
1431         TRACE("dwAlphaDestConstBitDepth %#x.\n", fx->dwAlphaDestConstBitDepth);
1432         TRACE("lpDDSAlphaDest %p.\n", fx->u3.lpDDSAlphaDest);
1433         TRACE("dwAlphaSrcConstBitDepth %#x.\n", fx->dwAlphaSrcConstBitDepth);
1434         TRACE("lpDDSAlphaSrc %p.\n", fx->u4.lpDDSAlphaSrc);
1435         TRACE("lpDDSPattern %p.\n", fx->u5.lpDDSPattern);
1436         TRACE("ddckDestColorkey {%#x, %#x}.\n",
1437                 fx->ddckDestColorkey.color_space_low_value,
1438                 fx->ddckDestColorkey.color_space_high_value);
1439         TRACE("ddckSrcColorkey {%#x, %#x}.\n",
1440                 fx->ddckSrcColorkey.color_space_low_value,
1441                 fx->ddckSrcColorkey.color_space_high_value);
1442     }
1443
1444     if (dst_surface->resource.map_count || (src_surface && src_surface->resource.map_count))
1445     {
1446         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1447         return WINEDDERR_SURFACEBUSY;
1448     }
1449
1450     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1451
1452     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1453             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1454             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1455             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1456             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1457     {
1458         WARN("The application gave us a bad destination rectangle.\n");
1459         return WINEDDERR_INVALIDRECT;
1460     }
1461
1462     if (src_surface)
1463     {
1464         surface_get_rect(src_surface, src_rect_in, &src_rect);
1465
1466         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1467                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1468                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1469                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1470                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1471         {
1472             WARN("Application gave us bad source rectangle for Blt.\n");
1473             return WINEDDERR_INVALIDRECT;
1474         }
1475     }
1476     else
1477     {
1478         memset(&src_rect, 0, sizeof(src_rect));
1479     }
1480
1481     if (!fx || !(fx->dwDDFX))
1482         flags &= ~WINEDDBLT_DDFX;
1483
1484     if (flags & WINEDDBLT_WAIT)
1485         flags &= ~WINEDDBLT_WAIT;
1486
1487     if (flags & WINEDDBLT_ASYNC)
1488     {
1489         static unsigned int once;
1490
1491         if (!once++)
1492             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1493         flags &= ~WINEDDBLT_ASYNC;
1494     }
1495
1496     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1497     if (flags & WINEDDBLT_DONOTWAIT)
1498     {
1499         static unsigned int once;
1500
1501         if (!once++)
1502             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1503         flags &= ~WINEDDBLT_DONOTWAIT;
1504     }
1505
1506     if (!device->d3d_initialized)
1507     {
1508         WARN("D3D not initialized, using fallback.\n");
1509         goto cpu;
1510     }
1511
1512     /* We want to avoid invalidating the sysmem location for converted
1513      * surfaces, since otherwise we'd have to convert the data back when
1514      * locking them. */
1515     if (dst_surface->flags & SFLAG_CONVERTED)
1516     {
1517         WARN("Converted surface, using CPU blit.\n");
1518         return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1519     }
1520
1521     if (flags & ~simple_blit)
1522     {
1523         WARN("Using fallback for complex blit (%#x).\n", flags);
1524         goto fallback;
1525     }
1526
1527     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1528         src_swapchain = src_surface->container.u.swapchain;
1529     else
1530         src_swapchain = NULL;
1531
1532     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1533         dst_swapchain = dst_surface->container.u.swapchain;
1534     else
1535         dst_swapchain = NULL;
1536
1537     /* This isn't strictly needed. FBO blits for example could deal with
1538      * cross-swapchain blits by first downloading the source to a texture
1539      * before switching to the destination context. We just have this here to
1540      * not have to deal with the issue, since cross-swapchain blits should be
1541      * rare. */
1542     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1543     {
1544         FIXME("Using fallback for cross-swapchain blit.\n");
1545         goto fallback;
1546     }
1547
1548     scale = src_surface
1549             && (src_rect.right - src_rect.left != dst_rect.right - dst_rect.left
1550             || src_rect.bottom - src_rect.top != dst_rect.bottom - dst_rect.top);
1551     convert = src_surface && src_surface->resource.format->id != dst_surface->resource.format->id;
1552
1553     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1554     if (src_surface)
1555         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1556     else
1557         src_ds_flags = 0;
1558
1559     if (src_ds_flags || dst_ds_flags)
1560     {
1561         if (flags & WINEDDBLT_DEPTHFILL)
1562         {
1563             float depth;
1564
1565             TRACE("Depth fill.\n");
1566
1567             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1568                 return WINED3DERR_INVALIDCALL;
1569
1570             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1571                 return WINED3D_OK;
1572         }
1573         else
1574         {
1575             if (src_ds_flags != dst_ds_flags)
1576             {
1577                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1578                 return WINED3DERR_INVALIDCALL;
1579             }
1580
1581             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1582                 return WINED3D_OK;
1583         }
1584     }
1585     else
1586     {
1587         /* In principle this would apply to depth blits as well, but we don't
1588          * implement those in the CPU blitter at the moment. */
1589         if ((dst_surface->flags & SFLAG_INSYSMEM)
1590                 && (!src_surface || (src_surface->flags & SFLAG_INSYSMEM)))
1591         {
1592             if (scale)
1593                 TRACE("Not doing sysmem blit because of scaling.\n");
1594             else if (convert)
1595                 TRACE("Not doing sysmem blit because of format conversion.\n");
1596             else
1597                 return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1598         }
1599
1600         if (flags & WINEDDBLT_COLORFILL)
1601         {
1602             struct wined3d_color color;
1603
1604             TRACE("Color fill.\n");
1605
1606             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1607                 goto fallback;
1608
1609             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1610                 return WINED3D_OK;
1611         }
1612         else
1613         {
1614             TRACE("Color blit.\n");
1615
1616             /* Upload */
1617             if ((src_surface->flags & SFLAG_INSYSMEM) && !(dst_surface->flags & SFLAG_INSYSMEM))
1618             {
1619                 if (scale)
1620                     TRACE("Not doing upload because of scaling.\n");
1621                 else if (convert)
1622                     TRACE("Not doing upload because of format conversion.\n");
1623                 else
1624                 {
1625                     POINT dst_point = {dst_rect.left, dst_rect.top};
1626
1627                     if (SUCCEEDED(surface_upload_from_surface(dst_surface, &dst_point, src_surface, &src_rect)))
1628                     {
1629                         if (!surface_is_offscreen(dst_surface))
1630                             surface_load_location(dst_surface, dst_surface->draw_binding, NULL);
1631                         return WINED3D_OK;
1632                     }
1633                 }
1634             }
1635
1636             /* Use present for back -> front blits. The idea behind this is
1637              * that present is potentially faster than a blit, in particular
1638              * when FBO blits aren't available. Some ddraw applications like
1639              * Half-Life and Prince of Persia 3D use Blt() from the backbuffer
1640              * to the frontbuffer instead of doing a Flip(). D3D8 and D3D9
1641              * applications can't blit directly to the frontbuffer. */
1642             if (dst_swapchain && dst_swapchain->back_buffers
1643                     && dst_surface == dst_swapchain->front_buffer
1644                     && src_surface == dst_swapchain->back_buffers[0])
1645             {
1646                 enum wined3d_swap_effect swap_effect = dst_swapchain->desc.swap_effect;
1647
1648                 TRACE("Using present for backbuffer -> frontbuffer blit.\n");
1649
1650                 /* Set the swap effect to COPY, we don't want the backbuffer
1651                  * to become undefined. */
1652                 dst_swapchain->desc.swap_effect = WINED3D_SWAP_EFFECT_COPY;
1653                 wined3d_swapchain_present(dst_swapchain, NULL, NULL, dst_swapchain->win_handle, NULL, 0);
1654                 dst_swapchain->desc.swap_effect = swap_effect;
1655
1656                 return WINED3D_OK;
1657             }
1658
1659             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1660                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1661                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1662             {
1663                 TRACE("Using FBO blit.\n");
1664
1665                 surface_blt_fbo(device, filter,
1666                         src_surface, src_surface->draw_binding, &src_rect,
1667                         dst_surface, dst_surface->draw_binding, &dst_rect);
1668                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1669                 return WINED3D_OK;
1670             }
1671
1672             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1673                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1674                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1675             {
1676                 TRACE("Using arbfp blit.\n");
1677
1678                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1679                     return WINED3D_OK;
1680             }
1681         }
1682     }
1683
1684 fallback:
1685
1686     /* Special cases for render targets. */
1687     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1688             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1689     {
1690         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1691                 src_surface, &src_rect, flags, fx, filter)))
1692             return WINED3D_OK;
1693     }
1694
1695 cpu:
1696
1697     /* For the rest call the X11 surface implementation. For render targets
1698      * this should be implemented OpenGL accelerated in BltOverride, other
1699      * blits are rather rare. */
1700     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1701 }
1702
1703 HRESULT CDECL wined3d_surface_get_render_target_data(struct wined3d_surface *surface,
1704         struct wined3d_surface *render_target)
1705 {
1706     TRACE("surface %p, render_target %p.\n", surface, render_target);
1707
1708     /* TODO: Check surface sizes, pools, etc. */
1709
1710     if (render_target->resource.multisample_type)
1711         return WINED3DERR_INVALIDCALL;
1712
1713     return wined3d_surface_blt(surface, NULL, render_target, NULL, 0, NULL, WINED3D_TEXF_POINT);
1714 }
1715
1716 /* Context activation is done by the caller. */
1717 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1718 {
1719     if (surface->flags & SFLAG_DIBSECTION)
1720     {
1721         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1722     }
1723     else
1724     {
1725         if (!surface->resource.heapMemory)
1726             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1727         else if (!(surface->flags & SFLAG_CLIENT))
1728             ERR("Surface %p has heapMemory %p and flags %#x.\n",
1729                     surface, surface->resource.heapMemory, surface->flags);
1730
1731         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1732                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1733     }
1734
1735     ENTER_GL();
1736     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1737     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1738     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1739             surface->resource.size, surface->resource.allocatedMemory));
1740     checkGLcall("glGetBufferSubDataARB");
1741     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1742     checkGLcall("glDeleteBuffersARB");
1743     LEAVE_GL();
1744
1745     surface->pbo = 0;
1746     surface->flags &= ~SFLAG_PBO;
1747 }
1748
1749 /* Do not call while under the GL lock. */
1750 static void surface_unload(struct wined3d_resource *resource)
1751 {
1752     struct wined3d_surface *surface = surface_from_resource(resource);
1753     struct wined3d_renderbuffer_entry *entry, *entry2;
1754     struct wined3d_device *device = resource->device;
1755     const struct wined3d_gl_info *gl_info;
1756     struct wined3d_context *context;
1757
1758     TRACE("surface %p.\n", surface);
1759
1760     if (resource->pool == WINED3D_POOL_DEFAULT)
1761     {
1762         /* Default pool resources are supposed to be destroyed before Reset is called.
1763          * Implicit resources stay however. So this means we have an implicit render target
1764          * or depth stencil. The content may be destroyed, but we still have to tear down
1765          * opengl resources, so we cannot leave early.
1766          *
1767          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1768          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1769          * or the depth stencil into an FBO the texture or render buffer will be removed
1770          * and all flags get lost
1771          */
1772         if (!(surface->flags & SFLAG_PBO))
1773             surface_init_sysmem(surface);
1774         /* We also get here when the ddraw swapchain is destroyed, for example
1775          * for a mode switch. In this case this surface won't necessarily be
1776          * an implicit surface. We have to mark it lost so that the
1777          * application can restore it after the mode switch. */
1778         surface->flags |= SFLAG_LOST;
1779     }
1780     else
1781     {
1782         /* Load the surface into system memory */
1783         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1784         surface_modify_location(surface, surface->draw_binding, FALSE);
1785     }
1786     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1787     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1788     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1789
1790     context = context_acquire(device, NULL);
1791     gl_info = context->gl_info;
1792
1793     /* Destroy PBOs, but load them into real sysmem before */
1794     if (surface->flags & SFLAG_PBO)
1795         surface_remove_pbo(surface, gl_info);
1796
1797     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1798      * all application-created targets the application has to release the surface
1799      * before calling _Reset
1800      */
1801     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1802     {
1803         ENTER_GL();
1804         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1805         LEAVE_GL();
1806         list_remove(&entry->entry);
1807         HeapFree(GetProcessHeap(), 0, entry);
1808     }
1809     list_init(&surface->renderbuffers);
1810     surface->current_renderbuffer = NULL;
1811
1812     ENTER_GL();
1813
1814     /* If we're in a texture, the texture name belongs to the texture.
1815      * Otherwise, destroy it. */
1816     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1817     {
1818         glDeleteTextures(1, &surface->texture_name);
1819         surface->texture_name = 0;
1820         glDeleteTextures(1, &surface->texture_name_srgb);
1821         surface->texture_name_srgb = 0;
1822     }
1823     if (surface->rb_multisample)
1824     {
1825         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1826         surface->rb_multisample = 0;
1827     }
1828     if (surface->rb_resolved)
1829     {
1830         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1831         surface->rb_resolved = 0;
1832     }
1833
1834     LEAVE_GL();
1835
1836     context_release(context);
1837
1838     resource_unload(resource);
1839 }
1840
1841 static const struct wined3d_resource_ops surface_resource_ops =
1842 {
1843     surface_unload,
1844 };
1845
1846 static const struct wined3d_surface_ops surface_ops =
1847 {
1848     surface_private_setup,
1849     surface_realize_palette,
1850     surface_map,
1851     surface_unmap,
1852 };
1853
1854 /*****************************************************************************
1855  * Initializes the GDI surface, aka creates the DIB section we render to
1856  * The DIB section creation is done by calling GetDC, which will create the
1857  * section and releasing the dc to allow the app to use it. The dib section
1858  * will stay until the surface is released
1859  *
1860  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1861  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1862  * avoid confusion in the shared surface code.
1863  *
1864  * Returns:
1865  *  WINED3D_OK on success
1866  *  The return values of called methods on failure
1867  *
1868  *****************************************************************************/
1869 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1870 {
1871     HRESULT hr;
1872
1873     TRACE("surface %p.\n", surface);
1874
1875     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1876     {
1877         ERR("Overlays not yet supported by GDI surfaces.\n");
1878         return WINED3DERR_INVALIDCALL;
1879     }
1880
1881     /* Sysmem textures have memory already allocated - release it,
1882      * this avoids an unnecessary memcpy. */
1883     hr = surface_create_dib_section(surface);
1884     if (SUCCEEDED(hr))
1885     {
1886         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1887         surface->resource.heapMemory = NULL;
1888         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1889     }
1890
1891     /* We don't mind the nonpow2 stuff in GDI. */
1892     surface->pow2Width = surface->resource.width;
1893     surface->pow2Height = surface->resource.height;
1894
1895     return WINED3D_OK;
1896 }
1897
1898 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1899 {
1900     struct wined3d_palette *palette = surface->palette;
1901
1902     TRACE("surface %p.\n", surface);
1903
1904     if (!palette) return;
1905
1906     if (surface->flags & SFLAG_DIBSECTION)
1907     {
1908         RGBQUAD col[256];
1909         unsigned int i;
1910
1911         TRACE("Updating the DC's palette.\n");
1912
1913         for (i = 0; i < 256; ++i)
1914         {
1915             col[i].rgbRed = palette->palents[i].peRed;
1916             col[i].rgbGreen = palette->palents[i].peGreen;
1917             col[i].rgbBlue = palette->palents[i].peBlue;
1918             col[i].rgbReserved = 0;
1919         }
1920         SetDIBColorTable(surface->hDC, 0, 256, col);
1921     }
1922
1923     /* Update the image because of the palette change. Some games like e.g.
1924      * Red Alert call SetEntries a lot to implement fading. */
1925     /* Tell the swapchain to update the screen. */
1926     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1927     {
1928         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1929         if (surface == swapchain->front_buffer)
1930         {
1931             x11_copy_to_screen(swapchain, NULL);
1932         }
1933     }
1934 }
1935
1936 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
1937 {
1938     TRACE("surface %p, rect %s, flags %#x.\n",
1939             surface, wine_dbgstr_rect(rect), flags);
1940
1941     if (!(surface->flags & SFLAG_DIBSECTION))
1942     {
1943         HRESULT hr;
1944
1945         /* This happens on gdi surfaces if the application set a user pointer
1946          * and resets it. Recreate the DIB section. */
1947         if (FAILED(hr = surface_create_dib_section(surface)))
1948         {
1949             ERR("Failed to create dib section, hr %#x.\n", hr);
1950             return;
1951         }
1952         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1953         surface->resource.heapMemory = NULL;
1954         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1955     }
1956 }
1957
1958 static void gdi_surface_unmap(struct wined3d_surface *surface)
1959 {
1960     TRACE("surface %p.\n", surface);
1961
1962     /* Tell the swapchain to update the screen. */
1963     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1964     {
1965         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1966         if (surface == swapchain->front_buffer)
1967         {
1968             x11_copy_to_screen(swapchain, &surface->lockedRect);
1969         }
1970     }
1971
1972     memset(&surface->lockedRect, 0, sizeof(RECT));
1973 }
1974
1975 static const struct wined3d_surface_ops gdi_surface_ops =
1976 {
1977     gdi_surface_private_setup,
1978     gdi_surface_realize_palette,
1979     gdi_surface_map,
1980     gdi_surface_unmap,
1981 };
1982
1983 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
1984 {
1985     GLuint *name;
1986     DWORD flag;
1987
1988     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
1989
1990     if(srgb)
1991     {
1992         name = &surface->texture_name_srgb;
1993         flag = SFLAG_INSRGBTEX;
1994     }
1995     else
1996     {
1997         name = &surface->texture_name;
1998         flag = SFLAG_INTEXTURE;
1999     }
2000
2001     if (!*name && new_name)
2002     {
2003         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2004          * surface has no texture name yet. See if we can get rid of this. */
2005         if (surface->flags & flag)
2006         {
2007             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2008             surface_modify_location(surface, flag, FALSE);
2009         }
2010     }
2011
2012     *name = new_name;
2013     surface_force_reload(surface);
2014 }
2015
2016 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
2017 {
2018     TRACE("surface %p, target %#x.\n", surface, target);
2019
2020     if (surface->texture_target != target)
2021     {
2022         if (target == GL_TEXTURE_RECTANGLE_ARB)
2023         {
2024             surface->flags &= ~SFLAG_NORMCOORD;
2025         }
2026         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2027         {
2028             surface->flags |= SFLAG_NORMCOORD;
2029         }
2030     }
2031     surface->texture_target = target;
2032     surface_force_reload(surface);
2033 }
2034
2035 /* Context activation is done by the caller. */
2036 void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
2037 {
2038     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
2039
2040     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2041     {
2042         struct wined3d_texture *texture = surface->container.u.texture;
2043
2044         TRACE("Passing to container (%p).\n", texture);
2045         texture->texture_ops->texture_bind(texture, context, srgb);
2046     }
2047     else
2048     {
2049         if (surface->texture_level)
2050         {
2051             ERR("Standalone surface %p is non-zero texture level %u.\n",
2052                     surface, surface->texture_level);
2053         }
2054
2055         if (srgb)
2056             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
2057
2058         ENTER_GL();
2059
2060         if (!surface->texture_name)
2061         {
2062             glGenTextures(1, &surface->texture_name);
2063             checkGLcall("glGenTextures");
2064
2065             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
2066
2067             context_bind_texture(context, surface->texture_target, surface->texture_name);
2068             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2069             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2070             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
2071             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2072             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2073             checkGLcall("glTexParameteri");
2074         }
2075         else
2076         {
2077             context_bind_texture(context, surface->texture_target, surface->texture_name);
2078         }
2079
2080         LEAVE_GL();
2081     }
2082 }
2083
2084 /* This call just downloads data, the caller is responsible for binding the
2085  * correct texture. */
2086 /* Context activation is done by the caller. */
2087 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2088 {
2089     const struct wined3d_format *format = surface->resource.format;
2090
2091     /* Only support read back of converted P8 surfaces. */
2092     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2093     {
2094         ERR("Trying to read back converted surface %p with format %s.\n", surface, debug_d3dformat(format->id));
2095         return;
2096     }
2097
2098     ENTER_GL();
2099
2100     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2101     {
2102         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2103                 surface, surface->texture_level, format->glFormat, format->glType,
2104                 surface->resource.allocatedMemory);
2105
2106         if (surface->flags & SFLAG_PBO)
2107         {
2108             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2109             checkGLcall("glBindBufferARB");
2110             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2111             checkGLcall("glGetCompressedTexImageARB");
2112             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2113             checkGLcall("glBindBufferARB");
2114         }
2115         else
2116         {
2117             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2118                     surface->texture_level, surface->resource.allocatedMemory));
2119             checkGLcall("glGetCompressedTexImageARB");
2120         }
2121
2122         LEAVE_GL();
2123     }
2124     else
2125     {
2126         void *mem;
2127         GLenum gl_format = format->glFormat;
2128         GLenum gl_type = format->glType;
2129         int src_pitch = 0;
2130         int dst_pitch = 0;
2131
2132         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2133         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2134         {
2135             gl_format = GL_ALPHA;
2136             gl_type = GL_UNSIGNED_BYTE;
2137         }
2138
2139         if (surface->flags & SFLAG_NONPOW2)
2140         {
2141             unsigned char alignment = surface->resource.device->surface_alignment;
2142             src_pitch = format->byte_count * surface->pow2Width;
2143             dst_pitch = wined3d_surface_get_pitch(surface);
2144             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2145             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2146         }
2147         else
2148         {
2149             mem = surface->resource.allocatedMemory;
2150         }
2151
2152         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2153                 surface, surface->texture_level, gl_format, gl_type, mem);
2154
2155         if (surface->flags & SFLAG_PBO)
2156         {
2157             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2158             checkGLcall("glBindBufferARB");
2159
2160             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2161             checkGLcall("glGetTexImage");
2162
2163             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2164             checkGLcall("glBindBufferARB");
2165         }
2166         else
2167         {
2168             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2169             checkGLcall("glGetTexImage");
2170         }
2171         LEAVE_GL();
2172
2173         if (surface->flags & SFLAG_NONPOW2)
2174         {
2175             const BYTE *src_data;
2176             BYTE *dst_data;
2177             UINT y;
2178             /*
2179              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2180              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2181              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2182              *
2183              * We're doing this...
2184              *
2185              * instead of boxing the texture :
2186              * |<-texture width ->|  -->pow2width|   /\
2187              * |111111111111111111|              |   |
2188              * |222 Texture 222222| boxed empty  | texture height
2189              * |3333 Data 33333333|              |   |
2190              * |444444444444444444|              |   \/
2191              * -----------------------------------   |
2192              * |     boxed  empty | boxed empty  | pow2height
2193              * |                  |              |   \/
2194              * -----------------------------------
2195              *
2196              *
2197              * we're repacking the data to the expected texture width
2198              *
2199              * |<-texture width ->|  -->pow2width|   /\
2200              * |111111111111111111222222222222222|   |
2201              * |222333333333333333333444444444444| texture height
2202              * |444444                           |   |
2203              * |                                 |   \/
2204              * |                                 |   |
2205              * |            empty                | pow2height
2206              * |                                 |   \/
2207              * -----------------------------------
2208              *
2209              * == is the same as
2210              *
2211              * |<-texture width ->|    /\
2212              * |111111111111111111|
2213              * |222222222222222222|texture height
2214              * |333333333333333333|
2215              * |444444444444444444|    \/
2216              * --------------------
2217              *
2218              * this also means that any references to allocatedMemory should work with the data as if were a
2219              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2220              *
2221              * internally the texture is still stored in a boxed format so any references to textureName will
2222              * get a boxed texture with width pow2width and not a texture of width resource.width.
2223              *
2224              * Performance should not be an issue, because applications normally do not lock the surfaces when
2225              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2226              * and doesn't have to be re-read. */
2227             src_data = mem;
2228             dst_data = surface->resource.allocatedMemory;
2229             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2230             for (y = 1; y < surface->resource.height; ++y)
2231             {
2232                 /* skip the first row */
2233                 src_data += src_pitch;
2234                 dst_data += dst_pitch;
2235                 memcpy(dst_data, src_data, dst_pitch);
2236             }
2237
2238             HeapFree(GetProcessHeap(), 0, mem);
2239         }
2240     }
2241
2242     /* Surface has now been downloaded */
2243     surface->flags |= SFLAG_INSYSMEM;
2244 }
2245
2246 /* This call just uploads data, the caller is responsible for binding the
2247  * correct texture. */
2248 /* Context activation is done by the caller. */
2249 static void surface_upload_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2250         const struct wined3d_format *format, const RECT *src_rect, UINT src_pitch, const POINT *dst_point,
2251         BOOL srgb, const struct wined3d_bo_address *data)
2252 {
2253     UINT update_w = src_rect->right - src_rect->left;
2254     UINT update_h = src_rect->bottom - src_rect->top;
2255
2256     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_pitch %u, dst_point %s, srgb %#x, data {%#x:%p}.\n",
2257             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_pitch,
2258             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2259
2260     if (surface->resource.map_count)
2261     {
2262         WARN("Uploading a surface that is currently mapped, setting SFLAG_PIN_SYSMEM.\n");
2263         surface->flags |= SFLAG_PIN_SYSMEM;
2264     }
2265
2266     if (format->flags & WINED3DFMT_FLAG_HEIGHT_SCALE)
2267     {
2268         update_h *= format->height_scale.numerator;
2269         update_h /= format->height_scale.denominator;
2270     }
2271
2272     ENTER_GL();
2273
2274     if (data->buffer_object)
2275     {
2276         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2277         checkGLcall("glBindBufferARB");
2278     }
2279
2280     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2281     {
2282         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2283         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2284         const BYTE *addr = data->addr;
2285         GLenum internal;
2286
2287         addr += (src_rect->top / format->block_height) * src_pitch;
2288         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2289
2290         if (srgb)
2291             internal = format->glGammaInternal;
2292         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2293             internal = format->rtInternal;
2294         else
2295             internal = format->glInternal;
2296
2297         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2298                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2299                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2300
2301         if (row_length == src_pitch)
2302         {
2303             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2304                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2305         }
2306         else
2307         {
2308             UINT row, y;
2309
2310             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2311              * can't use the unpack row length like below. */
2312             for (row = 0, y = dst_point->y; row < row_count; ++row)
2313             {
2314                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2315                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2316                 y += format->block_height;
2317                 addr += src_pitch;
2318             }
2319         }
2320         checkGLcall("glCompressedTexSubImage2DARB");
2321     }
2322     else
2323     {
2324         const BYTE *addr = data->addr;
2325
2326         addr += src_rect->top * src_pitch;
2327         addr += src_rect->left * format->byte_count;
2328
2329         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2330                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2331                 update_w, update_h, format->glFormat, format->glType, addr);
2332
2333         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_pitch / format->byte_count);
2334         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2335                 update_w, update_h, format->glFormat, format->glType, addr);
2336         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2337         checkGLcall("glTexSubImage2D");
2338     }
2339
2340     if (data->buffer_object)
2341     {
2342         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2343         checkGLcall("glBindBufferARB");
2344     }
2345
2346     LEAVE_GL();
2347
2348     if (wined3d_settings.strict_draw_ordering)
2349         wglFlush();
2350
2351     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2352     {
2353         struct wined3d_device *device = surface->resource.device;
2354         unsigned int i;
2355
2356         for (i = 0; i < device->context_count; ++i)
2357         {
2358             context_surface_update(device->contexts[i], surface);
2359         }
2360     }
2361 }
2362
2363 HRESULT surface_upload_from_surface(struct wined3d_surface *dst_surface, const POINT *dst_point,
2364         struct wined3d_surface *src_surface, const RECT *src_rect)
2365 {
2366     const struct wined3d_format *src_format;
2367     const struct wined3d_format *dst_format;
2368     const struct wined3d_gl_info *gl_info;
2369     enum wined3d_conversion_type convert;
2370     struct wined3d_context *context;
2371     struct wined3d_bo_address data;
2372     struct wined3d_format format;
2373     UINT update_w, update_h;
2374     UINT dst_w, dst_h;
2375     UINT src_w, src_h;
2376     UINT src_pitch;
2377     POINT p;
2378     RECT r;
2379
2380     TRACE("dst_surface %p, dst_point %s, src_surface %p, src_rect %s.\n",
2381             dst_surface, wine_dbgstr_point(dst_point),
2382             src_surface, wine_dbgstr_rect(src_rect));
2383
2384     src_format = src_surface->resource.format;
2385     dst_format = dst_surface->resource.format;
2386
2387     if (src_format->id != dst_format->id)
2388     {
2389         WARN("Source and destination surfaces should have the same format.\n");
2390         return WINED3DERR_INVALIDCALL;
2391     }
2392
2393     if (!dst_point)
2394     {
2395         p.x = 0;
2396         p.y = 0;
2397         dst_point = &p;
2398     }
2399     else if (dst_point->x < 0 || dst_point->y < 0)
2400     {
2401         WARN("Invalid destination point.\n");
2402         return WINED3DERR_INVALIDCALL;
2403     }
2404
2405     if (!src_rect)
2406     {
2407         r.left = 0;
2408         r.top = 0;
2409         r.right = src_surface->resource.width;
2410         r.bottom = src_surface->resource.height;
2411         src_rect = &r;
2412     }
2413     else if (src_rect->left < 0 || src_rect->left >= src_rect->right
2414             || src_rect->top < 0 || src_rect->top >= src_rect->bottom)
2415     {
2416         WARN("Invalid source rectangle.\n");
2417         return WINED3DERR_INVALIDCALL;
2418     }
2419
2420     src_w = src_surface->resource.width;
2421     src_h = src_surface->resource.height;
2422
2423     dst_w = dst_surface->resource.width;
2424     dst_h = dst_surface->resource.height;
2425
2426     update_w = src_rect->right - src_rect->left;
2427     update_h = src_rect->bottom - src_rect->top;
2428
2429     if (update_w > dst_w || dst_point->x > dst_w - update_w
2430             || update_h > dst_h || dst_point->y > dst_h - update_h)
2431     {
2432         WARN("Destination out of bounds.\n");
2433         return WINED3DERR_INVALIDCALL;
2434     }
2435
2436     /* NPOT block sizes would be silly. */
2437     if ((src_format->flags & WINED3DFMT_FLAG_BLOCKS)
2438             && ((update_w & (src_format->block_width - 1) || update_h & (src_format->block_height - 1))
2439             && (src_w != update_w || dst_w != update_w || src_h != update_h || dst_h != update_h)))
2440     {
2441         WARN("Update rect not block-aligned.\n");
2442         return WINED3DERR_INVALIDCALL;
2443     }
2444
2445     /* Use wined3d_surface_blt() instead of uploading directly if we need conversion. */
2446     d3dfmt_get_conv(dst_surface, FALSE, TRUE, &format, &convert);
2447     if (convert != WINED3D_CT_NONE || format.convert)
2448     {
2449         RECT dst_rect = {dst_point->x,  dst_point->y, dst_point->x + update_w, dst_point->y + update_h};
2450         return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, src_rect, 0, NULL, WINED3D_TEXF_POINT);
2451     }
2452
2453     context = context_acquire(dst_surface->resource.device, NULL);
2454     gl_info = context->gl_info;
2455
2456     /* Only load the surface for partial updates. For newly allocated texture
2457      * the texture wouldn't be the current location, and we'd upload zeroes
2458      * just to overwrite them again. */
2459     if (update_w == dst_w && update_h == dst_h)
2460         surface_prepare_texture(dst_surface, context, FALSE);
2461     else
2462         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
2463     surface_bind(dst_surface, context, FALSE);
2464
2465     data.buffer_object = src_surface->pbo;
2466     data.addr = src_surface->resource.allocatedMemory;
2467     src_pitch = wined3d_surface_get_pitch(src_surface);
2468
2469     surface_upload_data(dst_surface, gl_info, src_format, src_rect, src_pitch, dst_point, FALSE, &data);
2470
2471     invalidate_active_texture(dst_surface->resource.device, context);
2472
2473     context_release(context);
2474
2475     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
2476     return WINED3D_OK;
2477 }
2478
2479 /* This call just allocates the texture, the caller is responsible for binding
2480  * the correct texture. */
2481 /* Context activation is done by the caller. */
2482 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2483         const struct wined3d_format *format, BOOL srgb)
2484 {
2485     BOOL enable_client_storage = FALSE;
2486     GLsizei width = surface->pow2Width;
2487     GLsizei height = surface->pow2Height;
2488     const BYTE *mem = NULL;
2489     GLenum internal;
2490
2491     if (srgb)
2492     {
2493         internal = format->glGammaInternal;
2494     }
2495     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2496     {
2497         internal = format->rtInternal;
2498     }
2499     else
2500     {
2501         internal = format->glInternal;
2502     }
2503
2504     if (format->flags & WINED3DFMT_FLAG_HEIGHT_SCALE)
2505     {
2506         height *= format->height_scale.numerator;
2507         height /= format->height_scale.denominator;
2508     }
2509
2510     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2511             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2512             internal, width, height, format->glFormat, format->glType);
2513
2514     ENTER_GL();
2515
2516     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2517     {
2518         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2519                 || !surface->resource.allocatedMemory)
2520         {
2521             /* In some cases we want to disable client storage.
2522              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2523              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2524              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2525              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2526              */
2527             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2528             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2529             surface->flags &= ~SFLAG_CLIENT;
2530             enable_client_storage = TRUE;
2531         }
2532         else
2533         {
2534             surface->flags |= SFLAG_CLIENT;
2535
2536             /* Point OpenGL to our allocated texture memory. Do not use
2537              * resource.allocatedMemory here because it might point into a
2538              * PBO. Instead use heapMemory, but get the alignment right. */
2539             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2540                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2541         }
2542     }
2543
2544     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2545     {
2546         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2547                 internal, width, height, 0, surface->resource.size, mem));
2548         checkGLcall("glCompressedTexImage2DARB");
2549     }
2550     else
2551     {
2552         glTexImage2D(surface->texture_target, surface->texture_level,
2553                 internal, width, height, 0, format->glFormat, format->glType, mem);
2554         checkGLcall("glTexImage2D");
2555     }
2556
2557     if(enable_client_storage) {
2558         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2559         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2560     }
2561     LEAVE_GL();
2562 }
2563
2564 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2565  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2566 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2567 /* GL locking is done by the caller */
2568 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2569 {
2570     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2571     struct wined3d_renderbuffer_entry *entry;
2572     GLuint renderbuffer = 0;
2573     unsigned int src_width, src_height;
2574     unsigned int width, height;
2575
2576     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2577     {
2578         width = rt->pow2Width;
2579         height = rt->pow2Height;
2580     }
2581     else
2582     {
2583         width = surface->pow2Width;
2584         height = surface->pow2Height;
2585     }
2586
2587     src_width = surface->pow2Width;
2588     src_height = surface->pow2Height;
2589
2590     /* A depth stencil smaller than the render target is not valid */
2591     if (width > src_width || height > src_height) return;
2592
2593     /* Remove any renderbuffer set if the sizes match */
2594     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2595             || (width == src_width && height == src_height))
2596     {
2597         surface->current_renderbuffer = NULL;
2598         return;
2599     }
2600
2601     /* Look if we've already got a renderbuffer of the correct dimensions */
2602     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2603     {
2604         if (entry->width == width && entry->height == height)
2605         {
2606             renderbuffer = entry->id;
2607             surface->current_renderbuffer = entry;
2608             break;
2609         }
2610     }
2611
2612     if (!renderbuffer)
2613     {
2614         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2615         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2616         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2617                 surface->resource.format->glInternal, width, height);
2618
2619         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2620         entry->width = width;
2621         entry->height = height;
2622         entry->id = renderbuffer;
2623         list_add_head(&surface->renderbuffers, &entry->entry);
2624
2625         surface->current_renderbuffer = entry;
2626     }
2627
2628     checkGLcall("set_compatible_renderbuffer");
2629 }
2630
2631 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2632 {
2633     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2634
2635     TRACE("surface %p.\n", surface);
2636
2637     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2638     {
2639         ERR("Surface %p is not on a swapchain.\n", surface);
2640         return GL_NONE;
2641     }
2642
2643     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2644     {
2645         if (swapchain->render_to_fbo)
2646         {
2647             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2648             return GL_COLOR_ATTACHMENT0;
2649         }
2650         TRACE("Returning GL_BACK\n");
2651         return GL_BACK;
2652     }
2653     else if (surface == swapchain->front_buffer)
2654     {
2655         TRACE("Returning GL_FRONT\n");
2656         return GL_FRONT;
2657     }
2658
2659     FIXME("Higher back buffer, returning GL_BACK\n");
2660     return GL_BACK;
2661 }
2662
2663 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2664 void surface_add_dirty_rect(struct wined3d_surface *surface, const struct wined3d_box *dirty_rect)
2665 {
2666     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2667
2668     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2669         /* No partial locking for textures yet. */
2670         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2671
2672     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2673     if (dirty_rect)
2674     {
2675         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->left);
2676         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->top);
2677         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->right);
2678         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->bottom);
2679     }
2680     else
2681     {
2682         surface->dirtyRect.left = 0;
2683         surface->dirtyRect.top = 0;
2684         surface->dirtyRect.right = surface->resource.width;
2685         surface->dirtyRect.bottom = surface->resource.height;
2686     }
2687
2688     /* if the container is a texture then mark it dirty. */
2689     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2690     {
2691         TRACE("Passing to container.\n");
2692         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2693     }
2694 }
2695
2696 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2697 {
2698     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2699     BOOL ck_changed;
2700
2701     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2702
2703     if (surface->resource.pool == WINED3D_POOL_SCRATCH)
2704     {
2705         ERR("Not supported on scratch surfaces.\n");
2706         return WINED3DERR_INVALIDCALL;
2707     }
2708
2709     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2710
2711     /* Reload if either the texture and sysmem have different ideas about the
2712      * color key, or the actual key values changed. */
2713     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2714             && (surface->gl_color_key.color_space_low_value != surface->src_blt_color_key.color_space_low_value
2715             || surface->gl_color_key.color_space_high_value != surface->src_blt_color_key.color_space_high_value)))
2716     {
2717         TRACE("Reloading because of color keying\n");
2718         /* To perform the color key conversion we need a sysmem copy of
2719          * the surface. Make sure we have it. */
2720
2721         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2722         /* Make sure the texture is reloaded because of the color key change,
2723          * this kills performance though :( */
2724         /* TODO: This is not necessarily needed with hw palettized texture support. */
2725         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2726         /* Switching color keying on / off may change the internal format. */
2727         if (ck_changed)
2728             surface_force_reload(surface);
2729     }
2730     else if (!(surface->flags & flag))
2731     {
2732         TRACE("Reloading because surface is dirty.\n");
2733     }
2734     else
2735     {
2736         TRACE("surface is already in texture\n");
2737         return WINED3D_OK;
2738     }
2739
2740     /* No partial locking for textures yet. */
2741     surface_load_location(surface, flag, NULL);
2742     surface_evict_sysmem(surface);
2743
2744     return WINED3D_OK;
2745 }
2746
2747 /* See also float_16_to_32() in wined3d_private.h */
2748 static inline unsigned short float_32_to_16(const float *in)
2749 {
2750     int exp = 0;
2751     float tmp = fabsf(*in);
2752     unsigned int mantissa;
2753     unsigned short ret;
2754
2755     /* Deal with special numbers */
2756     if (*in == 0.0f)
2757         return 0x0000;
2758     if (isnan(*in))
2759         return 0x7c01;
2760     if (isinf(*in))
2761         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2762
2763     if (tmp < powf(2, 10))
2764     {
2765         do
2766         {
2767             tmp = tmp * 2.0f;
2768             exp--;
2769         } while (tmp < powf(2, 10));
2770     }
2771     else if (tmp >= powf(2, 11))
2772     {
2773         do
2774         {
2775             tmp /= 2.0f;
2776             exp++;
2777         } while (tmp >= powf(2, 11));
2778     }
2779
2780     mantissa = (unsigned int)tmp;
2781     if (tmp - mantissa >= 0.5f)
2782         ++mantissa; /* Round to nearest, away from zero. */
2783
2784     exp += 10;  /* Normalize the mantissa. */
2785     exp += 15;  /* Exponent is encoded with excess 15. */
2786
2787     if (exp > 30) /* too big */
2788     {
2789         ret = 0x7c00; /* INF */
2790     }
2791     else if (exp <= 0)
2792     {
2793         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2794         while (exp <= 0)
2795         {
2796             mantissa = mantissa >> 1;
2797             ++exp;
2798         }
2799         ret = mantissa & 0x3ff;
2800     }
2801     else
2802     {
2803         ret = (exp << 10) | (mantissa & 0x3ff);
2804     }
2805
2806     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2807     return ret;
2808 }
2809
2810 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2811 {
2812     ULONG refcount;
2813
2814     TRACE("Surface %p, container %p of type %#x.\n",
2815             surface, surface->container.u.base, surface->container.type);
2816
2817     switch (surface->container.type)
2818     {
2819         case WINED3D_CONTAINER_TEXTURE:
2820             return wined3d_texture_incref(surface->container.u.texture);
2821
2822         case WINED3D_CONTAINER_SWAPCHAIN:
2823             return wined3d_swapchain_incref(surface->container.u.swapchain);
2824
2825         default:
2826             ERR("Unhandled container type %#x.\n", surface->container.type);
2827         case WINED3D_CONTAINER_NONE:
2828             break;
2829     }
2830
2831     refcount = InterlockedIncrement(&surface->resource.ref);
2832     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2833
2834     return refcount;
2835 }
2836
2837 /* Do not call while under the GL lock. */
2838 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2839 {
2840     ULONG refcount;
2841
2842     TRACE("Surface %p, container %p of type %#x.\n",
2843             surface, surface->container.u.base, surface->container.type);
2844
2845     switch (surface->container.type)
2846     {
2847         case WINED3D_CONTAINER_TEXTURE:
2848             return wined3d_texture_decref(surface->container.u.texture);
2849
2850         case WINED3D_CONTAINER_SWAPCHAIN:
2851             return wined3d_swapchain_decref(surface->container.u.swapchain);
2852
2853         default:
2854             ERR("Unhandled container type %#x.\n", surface->container.type);
2855         case WINED3D_CONTAINER_NONE:
2856             break;
2857     }
2858
2859     refcount = InterlockedDecrement(&surface->resource.ref);
2860     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2861
2862     if (!refcount)
2863     {
2864         surface_cleanup(surface);
2865         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2866
2867         TRACE("Destroyed surface %p.\n", surface);
2868         HeapFree(GetProcessHeap(), 0, surface);
2869     }
2870
2871     return refcount;
2872 }
2873
2874 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2875 {
2876     return resource_set_priority(&surface->resource, priority);
2877 }
2878
2879 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2880 {
2881     return resource_get_priority(&surface->resource);
2882 }
2883
2884 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2885 {
2886     TRACE("surface %p.\n", surface);
2887
2888     if (!surface->resource.device->d3d_initialized)
2889     {
2890         ERR("D3D not initialized.\n");
2891         return;
2892     }
2893
2894     surface_internal_preload(surface, SRGB_ANY);
2895 }
2896
2897 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2898 {
2899     TRACE("surface %p.\n", surface);
2900
2901     return surface->resource.parent;
2902 }
2903
2904 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2905 {
2906     TRACE("surface %p.\n", surface);
2907
2908     return &surface->resource;
2909 }
2910
2911 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2912 {
2913     TRACE("surface %p, flags %#x.\n", surface, flags);
2914
2915     switch (flags)
2916     {
2917         case WINEDDGBS_CANBLT:
2918         case WINEDDGBS_ISBLTDONE:
2919             return WINED3D_OK;
2920
2921         default:
2922             return WINED3DERR_INVALIDCALL;
2923     }
2924 }
2925
2926 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2927 {
2928     TRACE("surface %p, flags %#x.\n", surface, flags);
2929
2930     /* XXX: DDERR_INVALIDSURFACETYPE */
2931
2932     switch (flags)
2933     {
2934         case WINEDDGFS_CANFLIP:
2935         case WINEDDGFS_ISFLIPDONE:
2936             return WINED3D_OK;
2937
2938         default:
2939             return WINED3DERR_INVALIDCALL;
2940     }
2941 }
2942
2943 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2944 {
2945     TRACE("surface %p.\n", surface);
2946
2947     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2948     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2949 }
2950
2951 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2952 {
2953     TRACE("surface %p.\n", surface);
2954
2955     surface->flags &= ~SFLAG_LOST;
2956     return WINED3D_OK;
2957 }
2958
2959 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2960 {
2961     TRACE("surface %p, palette %p.\n", surface, palette);
2962
2963     if (surface->palette == palette)
2964     {
2965         TRACE("Nop palette change.\n");
2966         return WINED3D_OK;
2967     }
2968
2969     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
2970         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
2971
2972     surface->palette = palette;
2973
2974     if (palette)
2975     {
2976         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
2977             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
2978
2979         surface->surface_ops->surface_realize_palette(surface);
2980     }
2981
2982     return WINED3D_OK;
2983 }
2984
2985 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
2986         DWORD flags, const struct wined3d_color_key *color_key)
2987 {
2988     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
2989
2990     if (flags & WINEDDCKEY_COLORSPACE)
2991     {
2992         FIXME(" colorkey value not supported (%08x) !\n", flags);
2993         return WINED3DERR_INVALIDCALL;
2994     }
2995
2996     /* Dirtify the surface, but only if a key was changed. */
2997     if (color_key)
2998     {
2999         switch (flags & ~WINEDDCKEY_COLORSPACE)
3000         {
3001             case WINEDDCKEY_DESTBLT:
3002                 surface->dst_blt_color_key = *color_key;
3003                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3004                 break;
3005
3006             case WINEDDCKEY_DESTOVERLAY:
3007                 surface->dst_overlay_color_key = *color_key;
3008                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3009                 break;
3010
3011             case WINEDDCKEY_SRCOVERLAY:
3012                 surface->src_overlay_color_key = *color_key;
3013                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3014                 break;
3015
3016             case WINEDDCKEY_SRCBLT:
3017                 surface->src_blt_color_key = *color_key;
3018                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3019                 break;
3020         }
3021     }
3022     else
3023     {
3024         switch (flags & ~WINEDDCKEY_COLORSPACE)
3025         {
3026             case WINEDDCKEY_DESTBLT:
3027                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3028                 break;
3029
3030             case WINEDDCKEY_DESTOVERLAY:
3031                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3032                 break;
3033
3034             case WINEDDCKEY_SRCOVERLAY:
3035                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3036                 break;
3037
3038             case WINEDDCKEY_SRCBLT:
3039                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3040                 break;
3041         }
3042     }
3043
3044     return WINED3D_OK;
3045 }
3046
3047 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3048 {
3049     TRACE("surface %p.\n", surface);
3050
3051     return surface->palette;
3052 }
3053
3054 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3055 {
3056     const struct wined3d_format *format = surface->resource.format;
3057     DWORD pitch;
3058
3059     TRACE("surface %p.\n", surface);
3060
3061     if (format->flags & WINED3DFMT_FLAG_BLOCKS)
3062     {
3063         /* Since compressed formats are block based, pitch means the amount of
3064          * bytes to the next row of block rather than the next row of pixels. */
3065         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3066         pitch = row_block_count * format->block_byte_count;
3067     }
3068     else
3069     {
3070         unsigned char alignment = surface->resource.device->surface_alignment;
3071         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3072         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3073     }
3074
3075     TRACE("Returning %u.\n", pitch);
3076
3077     return pitch;
3078 }
3079
3080 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3081 {
3082     TRACE("surface %p, mem %p.\n", surface, mem);
3083
3084     if (surface->resource.map_count || (surface->flags & SFLAG_DCINUSE))
3085     {
3086         WARN("Surface is mapped or the DC is in use.\n");
3087         return WINED3DERR_INVALIDCALL;
3088     }
3089
3090     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3091     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3092     {
3093         ERR("Not supported on render targets.\n");
3094         return WINED3DERR_INVALIDCALL;
3095     }
3096
3097     if (mem && mem != surface->resource.allocatedMemory)
3098     {
3099         void *release = NULL;
3100
3101         /* Do I have to copy the old surface content? */
3102         if (surface->flags & SFLAG_DIBSECTION)
3103         {
3104             DeleteDC(surface->hDC);
3105             DeleteObject(surface->dib.DIBsection);
3106             surface->dib.bitmap_data = NULL;
3107             surface->resource.allocatedMemory = NULL;
3108             surface->hDC = NULL;
3109             surface->flags &= ~SFLAG_DIBSECTION;
3110         }
3111         else if (!(surface->flags & SFLAG_USERPTR))
3112         {
3113             release = surface->resource.heapMemory;
3114             surface->resource.heapMemory = NULL;
3115         }
3116         surface->resource.allocatedMemory = mem;
3117         surface->flags |= SFLAG_USERPTR;
3118
3119         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3120         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3121
3122         /* For client textures OpenGL has to be notified. */
3123         if (surface->flags & SFLAG_CLIENT)
3124             surface_release_client_storage(surface);
3125
3126         /* Now free the old memory if any. */
3127         HeapFree(GetProcessHeap(), 0, release);
3128     }
3129     else if (surface->flags & SFLAG_USERPTR)
3130     {
3131         /* HeapMemory should be NULL already. */
3132         if (surface->resource.heapMemory)
3133             ERR("User pointer surface has heap memory allocated.\n");
3134
3135         if (!mem)
3136         {
3137             surface->resource.allocatedMemory = NULL;
3138             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3139
3140             if (surface->flags & SFLAG_CLIENT)
3141                 surface_release_client_storage(surface);
3142
3143             surface_prepare_system_memory(surface);
3144         }
3145
3146         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3147     }
3148
3149     return WINED3D_OK;
3150 }
3151
3152 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3153 {
3154     LONG w, h;
3155
3156     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3157
3158     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3159     {
3160         WARN("Not an overlay surface.\n");
3161         return WINEDDERR_NOTAOVERLAYSURFACE;
3162     }
3163
3164     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3165     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3166     surface->overlay_destrect.left = x;
3167     surface->overlay_destrect.top = y;
3168     surface->overlay_destrect.right = x + w;
3169     surface->overlay_destrect.bottom = y + h;
3170
3171     surface_draw_overlay(surface);
3172
3173     return WINED3D_OK;
3174 }
3175
3176 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3177 {
3178     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3179
3180     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3181     {
3182         TRACE("Not an overlay surface.\n");
3183         return WINEDDERR_NOTAOVERLAYSURFACE;
3184     }
3185
3186     if (!surface->overlay_dest)
3187     {
3188         TRACE("Overlay not visible.\n");
3189         *x = 0;
3190         *y = 0;
3191         return WINEDDERR_OVERLAYNOTVISIBLE;
3192     }
3193
3194     *x = surface->overlay_destrect.left;
3195     *y = surface->overlay_destrect.top;
3196
3197     TRACE("Returning position %d, %d.\n", *x, *y);
3198
3199     return WINED3D_OK;
3200 }
3201
3202 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3203         DWORD flags, struct wined3d_surface *ref)
3204 {
3205     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3206
3207     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3208     {
3209         TRACE("Not an overlay surface.\n");
3210         return WINEDDERR_NOTAOVERLAYSURFACE;
3211     }
3212
3213     return WINED3D_OK;
3214 }
3215
3216 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3217         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3218 {
3219     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3220             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3221
3222     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3223     {
3224         WARN("Not an overlay surface.\n");
3225         return WINEDDERR_NOTAOVERLAYSURFACE;
3226     }
3227     else if (!dst_surface)
3228     {
3229         WARN("Dest surface is NULL.\n");
3230         return WINED3DERR_INVALIDCALL;
3231     }
3232
3233     if (src_rect)
3234     {
3235         surface->overlay_srcrect = *src_rect;
3236     }
3237     else
3238     {
3239         surface->overlay_srcrect.left = 0;
3240         surface->overlay_srcrect.top = 0;
3241         surface->overlay_srcrect.right = surface->resource.width;
3242         surface->overlay_srcrect.bottom = surface->resource.height;
3243     }
3244
3245     if (dst_rect)
3246     {
3247         surface->overlay_destrect = *dst_rect;
3248     }
3249     else
3250     {
3251         surface->overlay_destrect.left = 0;
3252         surface->overlay_destrect.top = 0;
3253         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3254         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3255     }
3256
3257     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3258     {
3259         surface->overlay_dest = NULL;
3260         list_remove(&surface->overlay_entry);
3261     }
3262
3263     if (flags & WINEDDOVER_SHOW)
3264     {
3265         if (surface->overlay_dest != dst_surface)
3266         {
3267             surface->overlay_dest = dst_surface;
3268             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3269         }
3270     }
3271     else if (flags & WINEDDOVER_HIDE)
3272     {
3273         /* tests show that the rectangles are erased on hide */
3274         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3275         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3276         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3277         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3278         surface->overlay_dest = NULL;
3279     }
3280
3281     surface_draw_overlay(surface);
3282
3283     return WINED3D_OK;
3284 }
3285
3286 HRESULT CDECL wined3d_surface_update_desc(struct wined3d_surface *surface,
3287         UINT width, UINT height, enum wined3d_format_id format_id,
3288         enum wined3d_multisample_type multisample_type, UINT multisample_quality)
3289 {
3290     struct wined3d_device *device = surface->resource.device;
3291     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
3292     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
3293     UINT resource_size = wined3d_format_calculate_size(format, device->surface_alignment, width, height);
3294
3295     TRACE("surface %p, width %u, height %u, format %s, multisample_type %#x, multisample_quality %u.\n",
3296             surface, width, height, debug_d3dformat(format_id), multisample_type, multisample_type);
3297
3298     if (!resource_size)
3299         return WINED3DERR_INVALIDCALL;
3300
3301     if (device->d3d_initialized)
3302         surface->resource.resource_ops->resource_unload(&surface->resource);
3303
3304     if (surface->flags & SFLAG_DIBSECTION)
3305     {
3306         DeleteDC(surface->hDC);
3307         DeleteObject(surface->dib.DIBsection);
3308         surface->dib.bitmap_data = NULL;
3309         surface->flags &= ~SFLAG_DIBSECTION;
3310     }
3311
3312     surface->flags &= ~(SFLAG_LOCATIONS | SFLAG_USERPTR);
3313     surface->resource.allocatedMemory = NULL;
3314     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
3315     surface->resource.heapMemory = NULL;
3316
3317     surface->resource.width = width;
3318     surface->resource.height = height;
3319     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[ARB_TEXTURE_RECTANGLE]
3320             || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
3321     {
3322         surface->pow2Width = width;
3323         surface->pow2Height = height;
3324     }
3325     else
3326     {
3327         surface->pow2Width = surface->pow2Height = 1;
3328         while (surface->pow2Width < width)
3329             surface->pow2Width <<= 1;
3330         while (surface->pow2Height < height)
3331             surface->pow2Height <<= 1;
3332     }
3333
3334     if (surface->pow2Width != width || surface->pow2Height != height)
3335         surface->flags |= SFLAG_NONPOW2;
3336     else
3337         surface->flags &= ~SFLAG_NONPOW2;
3338
3339     surface->resource.format = format;
3340     surface->resource.multisample_type = multisample_type;
3341     surface->resource.multisample_quality = multisample_quality;
3342     surface->resource.size = resource_size;
3343
3344     if (!surface_init_sysmem(surface))
3345         return E_OUTOFMEMORY;
3346
3347     return WINED3D_OK;
3348 }
3349
3350 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3351         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3352 {
3353     unsigned short *dst_s;
3354     const float *src_f;
3355     unsigned int x, y;
3356
3357     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3358
3359     for (y = 0; y < h; ++y)
3360     {
3361         src_f = (const float *)(src + y * pitch_in);
3362         dst_s = (unsigned short *) (dst + y * pitch_out);
3363         for (x = 0; x < w; ++x)
3364         {
3365             dst_s[x] = float_32_to_16(src_f + x);
3366         }
3367     }
3368 }
3369
3370 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3371         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3372 {
3373     static const unsigned char convert_5to8[] =
3374     {
3375         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3376         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3377         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3378         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3379     };
3380     static const unsigned char convert_6to8[] =
3381     {
3382         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3383         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3384         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3385         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3386         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3387         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3388         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3389         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3390     };
3391     unsigned int x, y;
3392
3393     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3394
3395     for (y = 0; y < h; ++y)
3396     {
3397         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3398         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3399         for (x = 0; x < w; ++x)
3400         {
3401             WORD pixel = src_line[x];
3402             dst_line[x] = 0xff000000
3403                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3404                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3405                     | convert_5to8[(pixel & 0x001f)];
3406         }
3407     }
3408 }
3409
3410 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3411  * in both cases we're just setting the X / Alpha channel to 0xff. */
3412 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3413         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3414 {
3415     unsigned int x, y;
3416
3417     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3418
3419     for (y = 0; y < h; ++y)
3420     {
3421         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3422         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3423
3424         for (x = 0; x < w; ++x)
3425         {
3426             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3427         }
3428     }
3429 }
3430
3431 static inline BYTE cliptobyte(int x)
3432 {
3433     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3434 }
3435
3436 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3437         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3438 {
3439     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3440     unsigned int x, y;
3441
3442     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3443
3444     for (y = 0; y < h; ++y)
3445     {
3446         const BYTE *src_line = src + y * pitch_in;
3447         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3448         for (x = 0; x < w; ++x)
3449         {
3450             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3451              *     C = Y - 16; D = U - 128; E = V - 128;
3452              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3453              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3454              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3455              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3456              * U and V are shared between the pixels. */
3457             if (!(x & 1)) /* For every even pixel, read new U and V. */
3458             {
3459                 d = (int) src_line[1] - 128;
3460                 e = (int) src_line[3] - 128;
3461                 r2 = 409 * e + 128;
3462                 g2 = - 100 * d - 208 * e + 128;
3463                 b2 = 516 * d + 128;
3464             }
3465             c2 = 298 * ((int) src_line[0] - 16);
3466             dst_line[x] = 0xff000000
3467                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3468                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3469                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3470                 /* Scale RGB values to 0..255 range,
3471                  * then clip them if still not in range (may be negative),
3472                  * then shift them within DWORD if necessary. */
3473             src_line += 2;
3474         }
3475     }
3476 }
3477
3478 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3479         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3480 {
3481     unsigned int x, y;
3482     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3483
3484     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3485
3486     for (y = 0; y < h; ++y)
3487     {
3488         const BYTE *src_line = src + y * pitch_in;
3489         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3490         for (x = 0; x < w; ++x)
3491         {
3492             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3493              *     C = Y - 16; D = U - 128; E = V - 128;
3494              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3495              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3496              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3497              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3498              * U and V are shared between the pixels. */
3499             if (!(x & 1)) /* For every even pixel, read new U and V. */
3500             {
3501                 d = (int) src_line[1] - 128;
3502                 e = (int) src_line[3] - 128;
3503                 r2 = 409 * e + 128;
3504                 g2 = - 100 * d - 208 * e + 128;
3505                 b2 = 516 * d + 128;
3506             }
3507             c2 = 298 * ((int) src_line[0] - 16);
3508             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3509                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3510                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3511                 /* Scale RGB values to 0..255 range,
3512                  * then clip them if still not in range (may be negative),
3513                  * then shift them within DWORD if necessary. */
3514             src_line += 2;
3515         }
3516     }
3517 }
3518
3519 struct d3dfmt_convertor_desc
3520 {
3521     enum wined3d_format_id from, to;
3522     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3523 };
3524
3525 static const struct d3dfmt_convertor_desc convertors[] =
3526 {
3527     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3528     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3529     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3530     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3531     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3532     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3533 };
3534
3535 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3536         enum wined3d_format_id to)
3537 {
3538     unsigned int i;
3539
3540     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3541     {
3542         if (convertors[i].from == from && convertors[i].to == to)
3543             return &convertors[i];
3544     }
3545
3546     return NULL;
3547 }
3548
3549 /*****************************************************************************
3550  * surface_convert_format
3551  *
3552  * Creates a duplicate of a surface in a different format. Is used by Blt to
3553  * blit between surfaces with different formats.
3554  *
3555  * Parameters
3556  *  source: Source surface
3557  *  fmt: Requested destination format
3558  *
3559  *****************************************************************************/
3560 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3561 {
3562     struct wined3d_map_desc src_map, dst_map;
3563     const struct d3dfmt_convertor_desc *conv;
3564     struct wined3d_surface *ret = NULL;
3565     HRESULT hr;
3566
3567     conv = find_convertor(source->resource.format->id, to_fmt);
3568     if (!conv)
3569     {
3570         FIXME("Cannot find a conversion function from format %s to %s.\n",
3571                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3572         return NULL;
3573     }
3574
3575     wined3d_surface_create(source->resource.device, source->resource.width,
3576             source->resource.height, to_fmt, 0 /* level */, 0 /* usage */, WINED3D_POOL_SCRATCH,
3577             WINED3D_MULTISAMPLE_NONE /* TODO: Multisampled conversion */, 0 /* MultiSampleQuality */,
3578             source->surface_type, WINED3D_SURFACE_MAPPABLE | WINED3D_SURFACE_DISCARD,
3579             NULL /* parent */, &wined3d_null_parent_ops, &ret);
3580     if (!ret)
3581     {
3582         ERR("Failed to create a destination surface for conversion.\n");
3583         return NULL;
3584     }
3585
3586     memset(&src_map, 0, sizeof(src_map));
3587     memset(&dst_map, 0, sizeof(dst_map));
3588
3589     hr = wined3d_surface_map(source, &src_map, NULL, WINED3DLOCK_READONLY);
3590     if (FAILED(hr))
3591     {
3592         ERR("Failed to lock the source surface.\n");
3593         wined3d_surface_decref(ret);
3594         return NULL;
3595     }
3596     hr = wined3d_surface_map(ret, &dst_map, NULL, WINED3DLOCK_READONLY);
3597     if (FAILED(hr))
3598     {
3599         ERR("Failed to lock the destination surface.\n");
3600         wined3d_surface_unmap(source);
3601         wined3d_surface_decref(ret);
3602         return NULL;
3603     }
3604
3605     conv->convert(src_map.data, dst_map.data, src_map.row_pitch, dst_map.row_pitch,
3606             source->resource.width, source->resource.height);
3607
3608     wined3d_surface_unmap(ret);
3609     wined3d_surface_unmap(source);
3610
3611     return ret;
3612 }
3613
3614 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3615         unsigned int bpp, UINT pitch, DWORD color)
3616 {
3617     BYTE *first;
3618     int x, y;
3619
3620     /* Do first row */
3621
3622 #define COLORFILL_ROW(type) \
3623 do { \
3624     type *d = (type *)buf; \
3625     for (x = 0; x < width; ++x) \
3626         d[x] = (type)color; \
3627 } while(0)
3628
3629     switch (bpp)
3630     {
3631         case 1:
3632             COLORFILL_ROW(BYTE);
3633             break;
3634
3635         case 2:
3636             COLORFILL_ROW(WORD);
3637             break;
3638
3639         case 3:
3640         {
3641             BYTE *d = buf;
3642             for (x = 0; x < width; ++x, d += 3)
3643             {
3644                 d[0] = (color      ) & 0xFF;
3645                 d[1] = (color >>  8) & 0xFF;
3646                 d[2] = (color >> 16) & 0xFF;
3647             }
3648             break;
3649         }
3650         case 4:
3651             COLORFILL_ROW(DWORD);
3652             break;
3653
3654         default:
3655             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3656             return WINED3DERR_NOTAVAILABLE;
3657     }
3658
3659 #undef COLORFILL_ROW
3660
3661     /* Now copy first row. */
3662     first = buf;
3663     for (y = 1; y < height; ++y)
3664     {
3665         buf += pitch;
3666         memcpy(buf, first, width * bpp);
3667     }
3668
3669     return WINED3D_OK;
3670 }
3671
3672 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3673 {
3674     TRACE("surface %p.\n", surface);
3675
3676     if (!surface->resource.map_count)
3677     {
3678         WARN("Trying to unmap unmapped surface.\n");
3679         return WINEDDERR_NOTLOCKED;
3680     }
3681     --surface->resource.map_count;
3682
3683     surface->surface_ops->surface_unmap(surface);
3684
3685     return WINED3D_OK;
3686 }
3687
3688 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3689         struct wined3d_map_desc *map_desc, const RECT *rect, DWORD flags)
3690 {
3691     const struct wined3d_format *format = surface->resource.format;
3692
3693     TRACE("surface %p, map_desc %p, rect %s, flags %#x.\n",
3694             surface, map_desc, wine_dbgstr_rect(rect), flags);
3695
3696     if (surface->resource.map_count)
3697     {
3698         WARN("Surface is already mapped.\n");
3699         return WINED3DERR_INVALIDCALL;
3700     }
3701     if ((format->flags & WINED3DFMT_FLAG_BLOCKS)
3702             && rect && (rect->left || rect->top
3703             || rect->right != surface->resource.width
3704             || rect->bottom != surface->resource.height))
3705     {
3706         UINT width_mask = format->block_width - 1;
3707         UINT height_mask = format->block_height - 1;
3708
3709         if ((rect->left & width_mask) || (rect->right & width_mask)
3710                 || (rect->top & height_mask) || (rect->bottom & height_mask))
3711         {
3712             WARN("Map rect %s is misaligned for %ux%u blocks.\n",
3713                     wine_dbgstr_rect(rect), format->block_width, format->block_height);
3714
3715             if (surface->resource.pool == WINED3D_POOL_DEFAULT)
3716                 return WINED3DERR_INVALIDCALL;
3717         }
3718     }
3719
3720     ++surface->resource.map_count;
3721
3722     if (!(surface->flags & SFLAG_LOCKABLE))
3723         WARN("Trying to lock unlockable surface.\n");
3724
3725     /* Performance optimization: Count how often a surface is mapped, if it is
3726      * mapped regularly do not throw away the system memory copy. This avoids
3727      * the need to download the surface from OpenGL all the time. The surface
3728      * is still downloaded if the OpenGL texture is changed. */
3729     if (!(surface->flags & SFLAG_DYNLOCK))
3730     {
3731         if (++surface->lockCount > MAXLOCKCOUNT)
3732         {
3733             TRACE("Surface is mapped regularly, not freeing the system memory copy any more.\n");
3734             surface->flags |= SFLAG_DYNLOCK;
3735         }
3736     }
3737
3738     surface->surface_ops->surface_map(surface, rect, flags);
3739
3740     if (format->flags & WINED3DFMT_FLAG_BROKEN_PITCH)
3741         map_desc->row_pitch = surface->resource.width * format->byte_count;
3742     else
3743         map_desc->row_pitch = wined3d_surface_get_pitch(surface);
3744     map_desc->slice_pitch = 0;
3745
3746     if (!rect)
3747     {
3748         map_desc->data = surface->resource.allocatedMemory;
3749         surface->lockedRect.left = 0;
3750         surface->lockedRect.top = 0;
3751         surface->lockedRect.right = surface->resource.width;
3752         surface->lockedRect.bottom = surface->resource.height;
3753     }
3754     else
3755     {
3756         if ((format->flags & (WINED3DFMT_FLAG_BLOCKS | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_BLOCKS)
3757         {
3758             /* Compressed textures are block based, so calculate the offset of
3759              * the block that contains the top-left pixel of the locked rectangle. */
3760             map_desc->data = surface->resource.allocatedMemory
3761                     + ((rect->top / format->block_height) * map_desc->row_pitch)
3762                     + ((rect->left / format->block_width) * format->block_byte_count);
3763         }
3764         else
3765         {
3766             map_desc->data = surface->resource.allocatedMemory
3767                     + (map_desc->row_pitch * rect->top)
3768                     + (rect->left * format->byte_count);
3769         }
3770         surface->lockedRect.left = rect->left;
3771         surface->lockedRect.top = rect->top;
3772         surface->lockedRect.right = rect->right;
3773         surface->lockedRect.bottom = rect->bottom;
3774     }
3775
3776     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3777     TRACE("Returning memory %p, pitch %u.\n", map_desc->data, map_desc->row_pitch);
3778
3779     return WINED3D_OK;
3780 }
3781
3782 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3783 {
3784     struct wined3d_map_desc map;
3785     HRESULT hr;
3786
3787     TRACE("surface %p, dc %p.\n", surface, dc);
3788
3789     if (surface->flags & SFLAG_USERPTR)
3790     {
3791         ERR("Not supported on surfaces with application-provided memory.\n");
3792         return WINEDDERR_NODC;
3793     }
3794
3795     /* Give more detailed info for ddraw. */
3796     if (surface->flags & SFLAG_DCINUSE)
3797         return WINEDDERR_DCALREADYCREATED;
3798
3799     /* Can't GetDC if the surface is locked. */
3800     if (surface->resource.map_count)
3801         return WINED3DERR_INVALIDCALL;
3802
3803     /* Create a DIB section if there isn't a dc yet. */
3804     if (!surface->hDC)
3805     {
3806         if (surface->flags & SFLAG_CLIENT)
3807         {
3808             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3809             surface_release_client_storage(surface);
3810         }
3811         hr = surface_create_dib_section(surface);
3812         if (FAILED(hr))
3813             return WINED3DERR_INVALIDCALL;
3814
3815         /* Use the DIB section from now on if we are not using a PBO. */
3816         if (!(surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM)))
3817         {
3818             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
3819             surface->resource.heapMemory = NULL;
3820             surface->resource.allocatedMemory = surface->dib.bitmap_data;
3821         }
3822     }
3823
3824     /* Map the surface. */
3825     hr = wined3d_surface_map(surface, &map, NULL, 0);
3826     if (FAILED(hr))
3827     {
3828         ERR("Map failed, hr %#x.\n", hr);
3829         return hr;
3830     }
3831
3832     /* Sync the DIB with the PBO. This can't be done earlier because Map()
3833      * activates the allocatedMemory. */
3834     if (surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM))
3835         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
3836
3837     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3838             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3839     {
3840         /* GetDC on palettized formats is unsupported in D3D9, and the method
3841          * is missing in D3D8, so this should only be used for DX <=7
3842          * surfaces (with non-device palettes). */
3843         const PALETTEENTRY *pal = NULL;
3844
3845         if (surface->palette)
3846         {
3847             pal = surface->palette->palents;
3848         }
3849         else
3850         {
3851             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3852             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3853
3854             if (dds_primary && dds_primary->palette)
3855                 pal = dds_primary->palette->palents;
3856         }
3857
3858         if (pal)
3859         {
3860             RGBQUAD col[256];
3861             unsigned int i;
3862
3863             for (i = 0; i < 256; ++i)
3864             {
3865                 col[i].rgbRed = pal[i].peRed;
3866                 col[i].rgbGreen = pal[i].peGreen;
3867                 col[i].rgbBlue = pal[i].peBlue;
3868                 col[i].rgbReserved = 0;
3869             }
3870             SetDIBColorTable(surface->hDC, 0, 256, col);
3871         }
3872     }
3873
3874     surface->flags |= SFLAG_DCINUSE;
3875
3876     *dc = surface->hDC;
3877     TRACE("Returning dc %p.\n", *dc);
3878
3879     return WINED3D_OK;
3880 }
3881
3882 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3883 {
3884     TRACE("surface %p, dc %p.\n", surface, dc);
3885
3886     if (!(surface->flags & SFLAG_DCINUSE))
3887         return WINEDDERR_NODC;
3888
3889     if (surface->hDC != dc)
3890     {
3891         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3892                 dc, surface->hDC);
3893         return WINEDDERR_NODC;
3894     }
3895
3896     /* Copy the contents of the DIB over to the PBO. */
3897     if ((surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM)) && surface->resource.allocatedMemory)
3898         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
3899
3900     /* We locked first, so unlock now. */
3901     wined3d_surface_unmap(surface);
3902
3903     surface->flags &= ~SFLAG_DCINUSE;
3904
3905     return WINED3D_OK;
3906 }
3907
3908 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3909 {
3910     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3911
3912     if (flags)
3913     {
3914         static UINT once;
3915         if (!once++)
3916             FIXME("Ignoring flags %#x.\n", flags);
3917         else
3918             WARN("Ignoring flags %#x.\n", flags);
3919     }
3920
3921     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
3922     {
3923         ERR("Not supported on swapchain surfaces.\n");
3924         return WINEDDERR_NOTFLIPPABLE;
3925     }
3926
3927     /* Flipping is only supported on render targets and overlays. */
3928     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
3929     {
3930         WARN("Tried to flip a non-render target, non-overlay surface.\n");
3931         return WINEDDERR_NOTFLIPPABLE;
3932     }
3933
3934     flip_surface(surface, override);
3935
3936     /* Update overlays if they're visible. */
3937     if ((surface->resource.usage & WINED3DUSAGE_OVERLAY) && surface->overlay_dest)
3938         return surface_draw_overlay(surface);
3939
3940     return WINED3D_OK;
3941 }
3942
3943 /* Do not call while under the GL lock. */
3944 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3945 {
3946     struct wined3d_device *device = surface->resource.device;
3947
3948     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3949
3950     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3951     {
3952         struct wined3d_texture *texture = surface->container.u.texture;
3953
3954         TRACE("Passing to container (%p).\n", texture);
3955         texture->texture_ops->texture_preload(texture, srgb);
3956     }
3957     else
3958     {
3959         struct wined3d_context *context;
3960
3961         TRACE("(%p) : About to load surface\n", surface);
3962
3963         /* TODO: Use already acquired context when possible. */
3964         context = context_acquire(device, NULL);
3965
3966         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3967
3968         if (surface->resource.pool == WINED3D_POOL_DEFAULT)
3969         {
3970             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3971             GLclampf tmp;
3972             tmp = 0.9f;
3973             ENTER_GL();
3974             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3975             LEAVE_GL();
3976         }
3977
3978         context_release(context);
3979     }
3980 }
3981
3982 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3983 {
3984     if (!surface->resource.allocatedMemory)
3985     {
3986         if (!surface->resource.heapMemory)
3987         {
3988             if (!(surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
3989                     surface->resource.size + RESOURCE_ALIGNMENT)))
3990             {
3991                 ERR("Failed to allocate memory.\n");
3992                 return FALSE;
3993             }
3994         }
3995         else if (!(surface->flags & SFLAG_CLIENT))
3996         {
3997             ERR("Surface %p has heapMemory %p and flags %#x.\n",
3998                     surface, surface->resource.heapMemory, surface->flags);
3999         }
4000
4001         surface->resource.allocatedMemory =
4002             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
4003     }
4004     else
4005     {
4006         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
4007     }
4008
4009     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
4010
4011     return TRUE;
4012 }
4013
4014 /* Read the framebuffer back into the surface */
4015 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
4016 {
4017     struct wined3d_device *device = surface->resource.device;
4018     const struct wined3d_gl_info *gl_info;
4019     struct wined3d_context *context;
4020     BYTE *mem;
4021     GLint fmt;
4022     GLint type;
4023     BYTE *row, *top, *bottom;
4024     int i;
4025     BOOL bpp;
4026     RECT local_rect;
4027     BOOL srcIsUpsideDown;
4028     GLint rowLen = 0;
4029     GLint skipPix = 0;
4030     GLint skipRow = 0;
4031
4032     context = context_acquire(device, surface);
4033     context_apply_blit_state(context, device);
4034     gl_info = context->gl_info;
4035
4036     ENTER_GL();
4037
4038     /* Select the correct read buffer, and give some debug output.
4039      * There is no need to keep track of the current read buffer or reset it, every part of the code
4040      * that reads sets the read buffer as desired.
4041      */
4042     if (surface_is_offscreen(surface))
4043     {
4044         /* Mapping the primary render target which is not on a swapchain.
4045          * Read from the back buffer. */
4046         TRACE("Mapping offscreen render target.\n");
4047         glReadBuffer(device->offscreenBuffer);
4048         srcIsUpsideDown = TRUE;
4049     }
4050     else
4051     {
4052         /* Onscreen surfaces are always part of a swapchain */
4053         GLenum buffer = surface_get_gl_buffer(surface);
4054         TRACE("Mapping %#x buffer.\n", buffer);
4055         glReadBuffer(buffer);
4056         checkGLcall("glReadBuffer");
4057         srcIsUpsideDown = FALSE;
4058     }
4059
4060     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
4061     if (!rect)
4062     {
4063         local_rect.left = 0;
4064         local_rect.top = 0;
4065         local_rect.right = surface->resource.width;
4066         local_rect.bottom = surface->resource.height;
4067     }
4068     else
4069     {
4070         local_rect = *rect;
4071     }
4072     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
4073
4074     switch (surface->resource.format->id)
4075     {
4076         case WINED3DFMT_P8_UINT:
4077         {
4078             if (primary_render_target_is_p8(device))
4079             {
4080                 /* In case of P8 render targets the index is stored in the alpha component */
4081                 fmt = GL_ALPHA;
4082                 type = GL_UNSIGNED_BYTE;
4083                 mem = dest;
4084                 bpp = surface->resource.format->byte_count;
4085             }
4086             else
4087             {
4088                 /* GL can't return palettized data, so read ARGB pixels into a
4089                  * separate block of memory and convert them into palettized format
4090                  * in software. Slow, but if the app means to use palettized render
4091                  * targets and locks it...
4092                  *
4093                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4094                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4095                  * for the color channels when palettizing the colors.
4096                  */
4097                 fmt = GL_RGB;
4098                 type = GL_UNSIGNED_BYTE;
4099                 pitch *= 3;
4100                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4101                 if (!mem)
4102                 {
4103                     ERR("Out of memory\n");
4104                     LEAVE_GL();
4105                     return;
4106                 }
4107                 bpp = surface->resource.format->byte_count * 3;
4108             }
4109         }
4110         break;
4111
4112         default:
4113             mem = dest;
4114             fmt = surface->resource.format->glFormat;
4115             type = surface->resource.format->glType;
4116             bpp = surface->resource.format->byte_count;
4117     }
4118
4119     if (surface->flags & SFLAG_PBO)
4120     {
4121         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4122         checkGLcall("glBindBufferARB");
4123         if (mem)
4124         {
4125             ERR("mem not null for pbo -- unexpected\n");
4126             mem = NULL;
4127         }
4128     }
4129
4130     /* Save old pixel store pack state */
4131     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4132     checkGLcall("glGetIntegerv");
4133     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4134     checkGLcall("glGetIntegerv");
4135     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4136     checkGLcall("glGetIntegerv");
4137
4138     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4139     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4140     checkGLcall("glPixelStorei");
4141     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4142     checkGLcall("glPixelStorei");
4143     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4144     checkGLcall("glPixelStorei");
4145
4146     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4147             local_rect.right - local_rect.left,
4148             local_rect.bottom - local_rect.top,
4149             fmt, type, mem);
4150     checkGLcall("glReadPixels");
4151
4152     /* Reset previous pixel store pack state */
4153     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4154     checkGLcall("glPixelStorei");
4155     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4156     checkGLcall("glPixelStorei");
4157     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4158     checkGLcall("glPixelStorei");
4159
4160     if (surface->flags & SFLAG_PBO)
4161     {
4162         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4163         checkGLcall("glBindBufferARB");
4164
4165         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4166          * to get a pointer to it and perform the flipping in software. This is a lot
4167          * faster than calling glReadPixels for each line. In case we want more speed
4168          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4169         if (!srcIsUpsideDown)
4170         {
4171             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4172             checkGLcall("glBindBufferARB");
4173
4174             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4175             checkGLcall("glMapBufferARB");
4176         }
4177     }
4178
4179     /* TODO: Merge this with the palettization loop below for P8 targets */
4180     if(!srcIsUpsideDown) {
4181         UINT len, off;
4182         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4183             Flip the lines in software */
4184         len = (local_rect.right - local_rect.left) * bpp;
4185         off = local_rect.left * bpp;
4186
4187         row = HeapAlloc(GetProcessHeap(), 0, len);
4188         if(!row) {
4189             ERR("Out of memory\n");
4190             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4191                 HeapFree(GetProcessHeap(), 0, mem);
4192             LEAVE_GL();
4193             return;
4194         }
4195
4196         top = mem + pitch * local_rect.top;
4197         bottom = mem + pitch * (local_rect.bottom - 1);
4198         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4199             memcpy(row, top + off, len);
4200             memcpy(top + off, bottom + off, len);
4201             memcpy(bottom + off, row, len);
4202             top += pitch;
4203             bottom -= pitch;
4204         }
4205         HeapFree(GetProcessHeap(), 0, row);
4206
4207         /* Unmap the temp PBO buffer */
4208         if (surface->flags & SFLAG_PBO)
4209         {
4210             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4211             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4212         }
4213     }
4214
4215     LEAVE_GL();
4216     context_release(context);
4217
4218     /* For P8 textures we need to perform an inverse palette lookup. This is
4219      * done by searching for a palette index which matches the RGB value.
4220      * Note this isn't guaranteed to work when there are multiple entries for
4221      * the same color but we have no choice. In case of P8 render targets,
4222      * the index is stored in the alpha component so no conversion is needed. */
4223     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4224     {
4225         const PALETTEENTRY *pal = NULL;
4226         DWORD width = pitch / 3;
4227         int x, y, c;
4228
4229         if (surface->palette)
4230         {
4231             pal = surface->palette->palents;
4232         }
4233         else
4234         {
4235             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4236             HeapFree(GetProcessHeap(), 0, mem);
4237             return;
4238         }
4239
4240         for(y = local_rect.top; y < local_rect.bottom; y++) {
4241             for(x = local_rect.left; x < local_rect.right; x++) {
4242                 /*                      start              lines            pixels      */
4243                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4244                 const BYTE *green = blue  + 1;
4245                 const BYTE *red = green + 1;
4246
4247                 for(c = 0; c < 256; c++) {
4248                     if(*red   == pal[c].peRed   &&
4249                        *green == pal[c].peGreen &&
4250                        *blue  == pal[c].peBlue)
4251                     {
4252                         *((BYTE *) dest + y * width + x) = c;
4253                         break;
4254                     }
4255                 }
4256             }
4257         }
4258         HeapFree(GetProcessHeap(), 0, mem);
4259     }
4260 }
4261
4262 /* Read the framebuffer contents into a texture. Note that this function
4263  * doesn't do any kind of flipping. Using this on an onscreen surface will
4264  * result in a flipped D3D texture. */
4265 void surface_load_fb_texture(struct wined3d_surface *surface, BOOL srgb)
4266 {
4267     struct wined3d_device *device = surface->resource.device;
4268     struct wined3d_context *context;
4269
4270     context = context_acquire(device, surface);
4271     device_invalidate_state(device, STATE_FRAMEBUFFER);
4272
4273     surface_prepare_texture(surface, context, srgb);
4274     surface_bind_and_dirtify(surface, context, srgb);
4275
4276     TRACE("Reading back offscreen render target %p.\n", surface);
4277
4278     ENTER_GL();
4279
4280     if (surface_is_offscreen(surface))
4281         glReadBuffer(device->offscreenBuffer);
4282     else
4283         glReadBuffer(surface_get_gl_buffer(surface));
4284     checkGLcall("glReadBuffer");
4285
4286     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4287             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4288     checkGLcall("glCopyTexSubImage2D");
4289
4290     LEAVE_GL();
4291
4292     context_release(context);
4293 }
4294
4295 /* Context activation is done by the caller. */
4296 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4297         struct wined3d_context *context, BOOL srgb)
4298 {
4299     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4300     enum wined3d_conversion_type convert;
4301     struct wined3d_format format;
4302
4303     if (surface->flags & alloc_flag) return;
4304
4305     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4306     if (convert != WINED3D_CT_NONE || format.convert)
4307         surface->flags |= SFLAG_CONVERTED;
4308     else surface->flags &= ~SFLAG_CONVERTED;
4309
4310     surface_bind_and_dirtify(surface, context, srgb);
4311     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4312     surface->flags |= alloc_flag;
4313 }
4314
4315 /* Context activation is done by the caller. */
4316 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4317 {
4318     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4319     {
4320         struct wined3d_texture *texture = surface->container.u.texture;
4321         UINT sub_count = texture->level_count * texture->layer_count;
4322         UINT i;
4323
4324         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4325
4326         for (i = 0; i < sub_count; ++i)
4327         {
4328             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4329             surface_prepare_texture_internal(s, context, srgb);
4330         }
4331
4332         return;
4333     }
4334
4335     surface_prepare_texture_internal(surface, context, srgb);
4336 }
4337
4338 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4339 {
4340     if (multisample)
4341     {
4342         if (surface->rb_multisample)
4343             return;
4344
4345         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4346         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4347         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4348                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4349         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4350     }
4351     else
4352     {
4353         if (surface->rb_resolved)
4354             return;
4355
4356         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4357         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4358         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4359                 surface->pow2Width, surface->pow2Height);
4360         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4361     }
4362 }
4363
4364 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4365         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4366 {
4367     struct wined3d_device *device = surface->resource.device;
4368     UINT pitch = wined3d_surface_get_pitch(surface);
4369     const struct wined3d_gl_info *gl_info;
4370     struct wined3d_context *context;
4371     RECT local_rect;
4372     UINT w, h;
4373
4374     surface_get_rect(surface, rect, &local_rect);
4375
4376     mem += local_rect.top * pitch + local_rect.left * bpp;
4377     w = local_rect.right - local_rect.left;
4378     h = local_rect.bottom - local_rect.top;
4379
4380     /* Activate the correct context for the render target */
4381     context = context_acquire(device, surface);
4382     context_apply_blit_state(context, device);
4383     gl_info = context->gl_info;
4384
4385     ENTER_GL();
4386
4387     if (!surface_is_offscreen(surface))
4388     {
4389         GLenum buffer = surface_get_gl_buffer(surface);
4390         TRACE("Unlocking %#x buffer.\n", buffer);
4391         context_set_draw_buffer(context, buffer);
4392
4393         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4394         glPixelZoom(1.0f, -1.0f);
4395     }
4396     else
4397     {
4398         /* Primary offscreen render target */
4399         TRACE("Offscreen render target.\n");
4400         context_set_draw_buffer(context, device->offscreenBuffer);
4401
4402         glPixelZoom(1.0f, 1.0f);
4403     }
4404
4405     glRasterPos3i(local_rect.left, local_rect.top, 1);
4406     checkGLcall("glRasterPos3i");
4407
4408     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4409     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4410
4411     if (surface->flags & SFLAG_PBO)
4412     {
4413         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4414         checkGLcall("glBindBufferARB");
4415     }
4416
4417     glDrawPixels(w, h, fmt, type, mem);
4418     checkGLcall("glDrawPixels");
4419
4420     if (surface->flags & SFLAG_PBO)
4421     {
4422         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4423         checkGLcall("glBindBufferARB");
4424     }
4425
4426     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4427     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4428
4429     LEAVE_GL();
4430
4431     if (wined3d_settings.strict_draw_ordering
4432             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4433             && surface->container.u.swapchain->front_buffer == surface))
4434         wglFlush();
4435
4436     context_release(context);
4437 }
4438
4439 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck, BOOL use_texturing,
4440         struct wined3d_format *format, enum wined3d_conversion_type *conversion_type)
4441 {
4442     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4443     const struct wined3d_device *device = surface->resource.device;
4444     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4445     BOOL blit_supported = FALSE;
4446
4447     /* Copy the default values from the surface. Below we might perform fixups */
4448     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4449     *format = *surface->resource.format;
4450     *conversion_type = WINED3D_CT_NONE;
4451
4452     /* Ok, now look if we have to do any conversion */
4453     switch (surface->resource.format->id)
4454     {
4455         case WINED3DFMT_P8_UINT:
4456             /* Below the call to blit_supported is disabled for Wine 1.2
4457              * because the function isn't operating correctly yet. At the
4458              * moment 8-bit blits are handled in software and if certain GL
4459              * extensions are around, surface conversion is performed at
4460              * upload time. The blit_supported call recognizes it as a
4461              * destination fixup. This type of upload 'fixup' and 8-bit to
4462              * 8-bit blits need to be handled by the blit_shader.
4463              * TODO: get rid of this #if 0. */
4464 #if 0
4465             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4466                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4467                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4468 #endif
4469             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4470
4471             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4472              * texturing. Further also use conversion in case of color keying.
4473              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4474              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4475              * conflicts with this.
4476              */
4477             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4478                     || colorkey_active || !use_texturing)
4479             {
4480                 format->glFormat = GL_RGBA;
4481                 format->glInternal = GL_RGBA;
4482                 format->glType = GL_UNSIGNED_BYTE;
4483                 format->conv_byte_count = 4;
4484                 if (colorkey_active)
4485                     *conversion_type = WINED3D_CT_PALETTED_CK;
4486                 else
4487                     *conversion_type = WINED3D_CT_PALETTED;
4488             }
4489             break;
4490
4491         case WINED3DFMT_B2G3R3_UNORM:
4492             /* **********************
4493                 GL_UNSIGNED_BYTE_3_3_2
4494                 ********************** */
4495             if (colorkey_active) {
4496                 /* This texture format will never be used.. So do not care about color keying
4497                     up until the point in time it will be needed :-) */
4498                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4499             }
4500             break;
4501
4502         case WINED3DFMT_B5G6R5_UNORM:
4503             if (colorkey_active)
4504             {
4505                 *conversion_type = WINED3D_CT_CK_565;
4506                 format->glFormat = GL_RGBA;
4507                 format->glInternal = GL_RGB5_A1;
4508                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4509                 format->conv_byte_count = 2;
4510             }
4511             break;
4512
4513         case WINED3DFMT_B5G5R5X1_UNORM:
4514             if (colorkey_active)
4515             {
4516                 *conversion_type = WINED3D_CT_CK_5551;
4517                 format->glFormat = GL_BGRA;
4518                 format->glInternal = GL_RGB5_A1;
4519                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4520                 format->conv_byte_count = 2;
4521             }
4522             break;
4523
4524         case WINED3DFMT_B8G8R8_UNORM:
4525             if (colorkey_active)
4526             {
4527                 *conversion_type = WINED3D_CT_CK_RGB24;
4528                 format->glFormat = GL_RGBA;
4529                 format->glInternal = GL_RGBA8;
4530                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4531                 format->conv_byte_count = 4;
4532             }
4533             break;
4534
4535         case WINED3DFMT_B8G8R8X8_UNORM:
4536             if (colorkey_active)
4537             {
4538                 *conversion_type = WINED3D_CT_RGB32_888;
4539                 format->glFormat = GL_RGBA;
4540                 format->glInternal = GL_RGBA8;
4541                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4542                 format->conv_byte_count = 4;
4543             }
4544             break;
4545
4546         case WINED3DFMT_B8G8R8A8_UNORM:
4547             if (colorkey_active)
4548             {
4549                 *conversion_type = WINED3D_CT_CK_ARGB32;
4550                 format->conv_byte_count = 4;
4551             }
4552             break;
4553
4554         default:
4555             break;
4556     }
4557
4558     if (*conversion_type != WINED3D_CT_NONE)
4559     {
4560         format->rtInternal = format->glInternal;
4561         format->glGammaInternal = format->glInternal;
4562     }
4563
4564     return WINED3D_OK;
4565 }
4566
4567 static BOOL color_in_range(const struct wined3d_color_key *color_key, DWORD color)
4568 {
4569     /* FIXME: Is this really how color keys are supposed to work? I think it
4570      * makes more sense to compare the individual channels. */
4571     return color >= color_key->color_space_low_value
4572             && color <= color_key->color_space_high_value;
4573 }
4574
4575 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4576 {
4577     const struct wined3d_device *device = surface->resource.device;
4578     const struct wined3d_palette *pal = surface->palette;
4579     BOOL index_in_alpha = FALSE;
4580     unsigned int i;
4581
4582     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4583      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4584      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4585      * duplicate entries. Store the color key in the unused alpha component to speed the
4586      * download up and to make conversion unneeded. */
4587     index_in_alpha = primary_render_target_is_p8(device);
4588
4589     if (!pal)
4590     {
4591         ERR("This code should never get entered for DirectDraw!, expect problems\n");
4592         if (index_in_alpha)
4593         {
4594             /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4595              * there's no palette at this time. */
4596             for (i = 0; i < 256; i++) table[i][3] = i;
4597         }
4598     }
4599     else
4600     {
4601         TRACE("Using surface palette %p\n", pal);
4602         /* Get the surface's palette */
4603         for (i = 0; i < 256; ++i)
4604         {
4605             table[i][0] = pal->palents[i].peRed;
4606             table[i][1] = pal->palents[i].peGreen;
4607             table[i][2] = pal->palents[i].peBlue;
4608
4609             /* When index_in_alpha is set the palette index is stored in the
4610              * alpha component. In case of a readback we can then read
4611              * GL_ALPHA. Color keying is handled in BltOverride using a
4612              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4613              * color key itself is passed to glAlphaFunc in other cases the
4614              * alpha component of pixels that should be masked away is set to 0. */
4615             if (index_in_alpha)
4616                 table[i][3] = i;
4617             else if (colorkey && color_in_range(&surface->src_blt_color_key, i))
4618                 table[i][3] = 0x00;
4619             else if (pal->flags & WINEDDPCAPS_ALPHA)
4620                 table[i][3] = pal->palents[i].peFlags;
4621             else
4622                 table[i][3] = 0xFF;
4623         }
4624     }
4625 }
4626
4627 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width, UINT height,
4628         UINT outpitch, enum wined3d_conversion_type conversion_type, struct wined3d_surface *surface)
4629 {
4630     const BYTE *source;
4631     BYTE *dest;
4632
4633     TRACE("src %p, dst %p, pitch %u, width %u, height %u, outpitch %u, conversion_type %#x, surface %p.\n",
4634             src, dst, pitch, width, height, outpitch, conversion_type, surface);
4635
4636     switch (conversion_type)
4637     {
4638         case WINED3D_CT_NONE:
4639         {
4640             memcpy(dst, src, pitch * height);
4641             break;
4642         }
4643
4644         case WINED3D_CT_PALETTED:
4645         case WINED3D_CT_PALETTED_CK:
4646         {
4647             BYTE table[256][4];
4648             unsigned int x, y;
4649
4650             d3dfmt_p8_init_palette(surface, table, (conversion_type == WINED3D_CT_PALETTED_CK));
4651
4652             for (y = 0; y < height; y++)
4653             {
4654                 source = src + pitch * y;
4655                 dest = dst + outpitch * y;
4656                 /* This is an 1 bpp format, using the width here is fine */
4657                 for (x = 0; x < width; x++) {
4658                     BYTE color = *source++;
4659                     *dest++ = table[color][0];
4660                     *dest++ = table[color][1];
4661                     *dest++ = table[color][2];
4662                     *dest++ = table[color][3];
4663                 }
4664             }
4665         }
4666         break;
4667
4668         case WINED3D_CT_CK_565:
4669         {
4670             /* Converting the 565 format in 5551 packed to emulate color-keying.
4671
4672               Note : in all these conversion, it would be best to average the averaging
4673                       pixels to get the color of the pixel that will be color-keyed to
4674                       prevent 'color bleeding'. This will be done later on if ever it is
4675                       too visible.
4676
4677               Note2: Nvidia documents say that their driver does not support alpha + color keying
4678                      on the same surface and disables color keying in such a case
4679             */
4680             unsigned int x, y;
4681             const WORD *Source;
4682             WORD *Dest;
4683
4684             TRACE("Color keyed 565\n");
4685
4686             for (y = 0; y < height; y++) {
4687                 Source = (const WORD *)(src + y * pitch);
4688                 Dest = (WORD *) (dst + y * outpitch);
4689                 for (x = 0; x < width; x++ ) {
4690                     WORD color = *Source++;
4691                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4692                     if (!color_in_range(&surface->src_blt_color_key, color))
4693                         *Dest |= 0x0001;
4694                     Dest++;
4695                 }
4696             }
4697         }
4698         break;
4699
4700         case WINED3D_CT_CK_5551:
4701         {
4702             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4703             unsigned int x, y;
4704             const WORD *Source;
4705             WORD *Dest;
4706             TRACE("Color keyed 5551\n");
4707             for (y = 0; y < height; y++) {
4708                 Source = (const WORD *)(src + y * pitch);
4709                 Dest = (WORD *) (dst + y * outpitch);
4710                 for (x = 0; x < width; x++ ) {
4711                     WORD color = *Source++;
4712                     *Dest = color;
4713                     if (!color_in_range(&surface->src_blt_color_key, color))
4714                         *Dest |= (1 << 15);
4715                     else
4716                         *Dest &= ~(1 << 15);
4717                     Dest++;
4718                 }
4719             }
4720         }
4721         break;
4722
4723         case WINED3D_CT_CK_RGB24:
4724         {
4725             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4726             unsigned int x, y;
4727             for (y = 0; y < height; y++)
4728             {
4729                 source = src + pitch * y;
4730                 dest = dst + outpitch * y;
4731                 for (x = 0; x < width; x++) {
4732                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4733                     DWORD dstcolor = color << 8;
4734                     if (!color_in_range(&surface->src_blt_color_key, color))
4735                         dstcolor |= 0xff;
4736                     *(DWORD*)dest = dstcolor;
4737                     source += 3;
4738                     dest += 4;
4739                 }
4740             }
4741         }
4742         break;
4743
4744         case WINED3D_CT_RGB32_888:
4745         {
4746             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4747             unsigned int x, y;
4748             for (y = 0; y < height; y++)
4749             {
4750                 source = src + pitch * y;
4751                 dest = dst + outpitch * y;
4752                 for (x = 0; x < width; x++) {
4753                     DWORD color = 0xffffff & *(const DWORD*)source;
4754                     DWORD dstcolor = color << 8;
4755                     if (!color_in_range(&surface->src_blt_color_key, color))
4756                         dstcolor |= 0xff;
4757                     *(DWORD*)dest = dstcolor;
4758                     source += 4;
4759                     dest += 4;
4760                 }
4761             }
4762         }
4763         break;
4764
4765         case WINED3D_CT_CK_ARGB32:
4766         {
4767             unsigned int x, y;
4768             for (y = 0; y < height; ++y)
4769             {
4770                 source = src + pitch * y;
4771                 dest = dst + outpitch * y;
4772                 for (x = 0; x < width; ++x)
4773                 {
4774                     DWORD color = *(const DWORD *)source;
4775                     if (color_in_range(&surface->src_blt_color_key, color))
4776                         color &= ~0xff000000;
4777                     *(DWORD*)dest = color;
4778                     source += 4;
4779                     dest += 4;
4780                 }
4781             }
4782         }
4783         break;
4784
4785         default:
4786             ERR("Unsupported conversion type %#x.\n", conversion_type);
4787     }
4788     return WINED3D_OK;
4789 }
4790
4791 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4792 {
4793     /* Flip the surface contents */
4794     /* Flip the DC */
4795     {
4796         HDC tmp;
4797         tmp = front->hDC;
4798         front->hDC = back->hDC;
4799         back->hDC = tmp;
4800     }
4801
4802     /* Flip the DIBsection */
4803     {
4804         HBITMAP tmp = front->dib.DIBsection;
4805         front->dib.DIBsection = back->dib.DIBsection;
4806         back->dib.DIBsection = tmp;
4807     }
4808
4809     /* Flip the surface data */
4810     {
4811         void* tmp;
4812
4813         tmp = front->dib.bitmap_data;
4814         front->dib.bitmap_data = back->dib.bitmap_data;
4815         back->dib.bitmap_data = tmp;
4816
4817         tmp = front->resource.allocatedMemory;
4818         front->resource.allocatedMemory = back->resource.allocatedMemory;
4819         back->resource.allocatedMemory = tmp;
4820
4821         tmp = front->resource.heapMemory;
4822         front->resource.heapMemory = back->resource.heapMemory;
4823         back->resource.heapMemory = tmp;
4824     }
4825
4826     /* Flip the PBO */
4827     {
4828         GLuint tmp_pbo = front->pbo;
4829         front->pbo = back->pbo;
4830         back->pbo = tmp_pbo;
4831     }
4832
4833     /* Flip the opengl texture */
4834     {
4835         GLuint tmp;
4836
4837         tmp = back->texture_name;
4838         back->texture_name = front->texture_name;
4839         front->texture_name = tmp;
4840
4841         tmp = back->texture_name_srgb;
4842         back->texture_name_srgb = front->texture_name_srgb;
4843         front->texture_name_srgb = tmp;
4844
4845         tmp = back->rb_multisample;
4846         back->rb_multisample = front->rb_multisample;
4847         front->rb_multisample = tmp;
4848
4849         tmp = back->rb_resolved;
4850         back->rb_resolved = front->rb_resolved;
4851         front->rb_resolved = tmp;
4852
4853         resource_unload(&back->resource);
4854         resource_unload(&front->resource);
4855     }
4856
4857     {
4858         DWORD tmp_flags = back->flags;
4859         back->flags = front->flags;
4860         front->flags = tmp_flags;
4861     }
4862 }
4863
4864 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4865  * pixel copy calls. */
4866 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4867         const RECT *src_rect, const RECT *dst_rect_in, enum wined3d_texture_filter_type filter)
4868 {
4869     struct wined3d_device *device = dst_surface->resource.device;
4870     float xrel, yrel;
4871     UINT row;
4872     struct wined3d_context *context;
4873     BOOL upsidedown = FALSE;
4874     RECT dst_rect = *dst_rect_in;
4875
4876     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4877      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4878      */
4879     if(dst_rect.top > dst_rect.bottom) {
4880         UINT tmp = dst_rect.bottom;
4881         dst_rect.bottom = dst_rect.top;
4882         dst_rect.top = tmp;
4883         upsidedown = TRUE;
4884     }
4885
4886     context = context_acquire(device, src_surface);
4887     context_apply_blit_state(context, device);
4888     surface_internal_preload(dst_surface, SRGB_RGB);
4889     ENTER_GL();
4890
4891     /* Bind the target texture */
4892     context_bind_texture(context, dst_surface->texture_target, dst_surface->texture_name);
4893     if (surface_is_offscreen(src_surface))
4894     {
4895         TRACE("Reading from an offscreen target\n");
4896         upsidedown = !upsidedown;
4897         glReadBuffer(device->offscreenBuffer);
4898     }
4899     else
4900     {
4901         glReadBuffer(surface_get_gl_buffer(src_surface));
4902     }
4903     checkGLcall("glReadBuffer");
4904
4905     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4906     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4907
4908     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4909     {
4910         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4911
4912         if (filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT)
4913             ERR("Texture filtering not supported in direct blit.\n");
4914     }
4915     else if ((filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT)
4916             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4917     {
4918         ERR("Texture filtering not supported in direct blit\n");
4919     }
4920
4921     if (upsidedown
4922             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4923             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4924     {
4925         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4926
4927         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4928                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4929                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4930                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4931     }
4932     else
4933     {
4934         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4935         /* I have to process this row by row to swap the image,
4936          * otherwise it would be upside down, so stretching in y direction
4937          * doesn't cost extra time
4938          *
4939          * However, stretching in x direction can be avoided if not necessary
4940          */
4941         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4942             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4943             {
4944                 /* Well, that stuff works, but it's very slow.
4945                  * find a better way instead
4946                  */
4947                 UINT col;
4948
4949                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4950                 {
4951                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4952                             dst_rect.left + col /* x offset */, row /* y offset */,
4953                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4954                 }
4955             }
4956             else
4957             {
4958                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4959                         dst_rect.left /* x offset */, row /* y offset */,
4960                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4961             }
4962         }
4963     }
4964     checkGLcall("glCopyTexSubImage2D");
4965
4966     LEAVE_GL();
4967     context_release(context);
4968
4969     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4970      * path is never entered
4971      */
4972     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4973 }
4974
4975 /* Uses the hardware to stretch and flip the image */
4976 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4977         const RECT *src_rect, const RECT *dst_rect_in, enum wined3d_texture_filter_type filter)
4978 {
4979     struct wined3d_device *device = dst_surface->resource.device;
4980     struct wined3d_swapchain *src_swapchain = NULL;
4981     GLuint src, backup = 0;
4982     float left, right, top, bottom; /* Texture coordinates */
4983     UINT fbwidth = src_surface->resource.width;
4984     UINT fbheight = src_surface->resource.height;
4985     struct wined3d_context *context;
4986     GLenum drawBuffer = GL_BACK;
4987     GLenum texture_target;
4988     BOOL noBackBufferBackup;
4989     BOOL src_offscreen;
4990     BOOL upsidedown = FALSE;
4991     RECT dst_rect = *dst_rect_in;
4992
4993     TRACE("Using hwstretch blit\n");
4994     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4995     context = context_acquire(device, src_surface);
4996     context_apply_blit_state(context, device);
4997     surface_internal_preload(dst_surface, SRGB_RGB);
4998
4999     src_offscreen = surface_is_offscreen(src_surface);
5000     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
5001     if (!noBackBufferBackup && !src_surface->texture_name)
5002     {
5003         /* Get it a description */
5004         surface_internal_preload(src_surface, SRGB_RGB);
5005     }
5006     ENTER_GL();
5007
5008     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
5009      * This way we don't have to wait for the 2nd readback to finish to leave this function.
5010      */
5011     if (context->aux_buffers >= 2)
5012     {
5013         /* Got more than one aux buffer? Use the 2nd aux buffer */
5014         drawBuffer = GL_AUX1;
5015     }
5016     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
5017     {
5018         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
5019         drawBuffer = GL_AUX0;
5020     }
5021
5022     if(noBackBufferBackup) {
5023         glGenTextures(1, &backup);
5024         checkGLcall("glGenTextures");
5025         context_bind_texture(context, GL_TEXTURE_2D, backup);
5026         texture_target = GL_TEXTURE_2D;
5027     } else {
5028         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
5029          * we are reading from the back buffer, the backup can be used as source texture
5030          */
5031         texture_target = src_surface->texture_target;
5032         context_bind_texture(context, texture_target, src_surface->texture_name);
5033         glEnable(texture_target);
5034         checkGLcall("glEnable(texture_target)");
5035
5036         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
5037         src_surface->flags &= ~SFLAG_INTEXTURE;
5038     }
5039
5040     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
5041      * glCopyTexSubImage is a bit picky about the parameters we pass to it
5042      */
5043     if(dst_rect.top > dst_rect.bottom) {
5044         UINT tmp = dst_rect.bottom;
5045         dst_rect.bottom = dst_rect.top;
5046         dst_rect.top = tmp;
5047         upsidedown = TRUE;
5048     }
5049
5050     if (src_offscreen)
5051     {
5052         TRACE("Reading from an offscreen target\n");
5053         upsidedown = !upsidedown;
5054         glReadBuffer(device->offscreenBuffer);
5055     }
5056     else
5057     {
5058         glReadBuffer(surface_get_gl_buffer(src_surface));
5059     }
5060
5061     /* TODO: Only back up the part that will be overwritten */
5062     glCopyTexSubImage2D(texture_target, 0,
5063                         0, 0 /* read offsets */,
5064                         0, 0,
5065                         fbwidth,
5066                         fbheight);
5067
5068     checkGLcall("glCopyTexSubImage2D");
5069
5070     /* No issue with overriding these - the sampler is dirty due to blit usage */
5071     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
5072             wined3d_gl_mag_filter(magLookup, filter));
5073     checkGLcall("glTexParameteri");
5074     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
5075             wined3d_gl_min_mip_filter(minMipLookup, filter, WINED3D_TEXF_NONE));
5076     checkGLcall("glTexParameteri");
5077
5078     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5079         src_swapchain = src_surface->container.u.swapchain;
5080     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
5081     {
5082         src = backup ? backup : src_surface->texture_name;
5083     }
5084     else
5085     {
5086         glReadBuffer(GL_FRONT);
5087         checkGLcall("glReadBuffer(GL_FRONT)");
5088
5089         glGenTextures(1, &src);
5090         checkGLcall("glGenTextures(1, &src)");
5091         context_bind_texture(context, GL_TEXTURE_2D, src);
5092
5093         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5094          * out for power of 2 sizes
5095          */
5096         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5097                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5098         checkGLcall("glTexImage2D");
5099         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
5100                             0, 0 /* read offsets */,
5101                             0, 0,
5102                             fbwidth,
5103                             fbheight);
5104
5105         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5106         checkGLcall("glTexParameteri");
5107         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5108         checkGLcall("glTexParameteri");
5109
5110         glReadBuffer(GL_BACK);
5111         checkGLcall("glReadBuffer(GL_BACK)");
5112
5113         if(texture_target != GL_TEXTURE_2D) {
5114             glDisable(texture_target);
5115             glEnable(GL_TEXTURE_2D);
5116             texture_target = GL_TEXTURE_2D;
5117         }
5118     }
5119     checkGLcall("glEnd and previous");
5120
5121     left = src_rect->left;
5122     right = src_rect->right;
5123
5124     if (!upsidedown)
5125     {
5126         top = src_surface->resource.height - src_rect->top;
5127         bottom = src_surface->resource.height - src_rect->bottom;
5128     }
5129     else
5130     {
5131         top = src_surface->resource.height - src_rect->bottom;
5132         bottom = src_surface->resource.height - src_rect->top;
5133     }
5134
5135     if (src_surface->flags & SFLAG_NORMCOORD)
5136     {
5137         left /= src_surface->pow2Width;
5138         right /= src_surface->pow2Width;
5139         top /= src_surface->pow2Height;
5140         bottom /= src_surface->pow2Height;
5141     }
5142
5143     /* draw the source texture stretched and upside down. The correct surface is bound already */
5144     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5145     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5146
5147     context_set_draw_buffer(context, drawBuffer);
5148     glReadBuffer(drawBuffer);
5149
5150     glBegin(GL_QUADS);
5151         /* bottom left */
5152         glTexCoord2f(left, bottom);
5153         glVertex2i(0, 0);
5154
5155         /* top left */
5156         glTexCoord2f(left, top);
5157         glVertex2i(0, dst_rect.bottom - dst_rect.top);
5158
5159         /* top right */
5160         glTexCoord2f(right, top);
5161         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5162
5163         /* bottom right */
5164         glTexCoord2f(right, bottom);
5165         glVertex2i(dst_rect.right - dst_rect.left, 0);
5166     glEnd();
5167     checkGLcall("glEnd and previous");
5168
5169     if (texture_target != dst_surface->texture_target)
5170     {
5171         glDisable(texture_target);
5172         glEnable(dst_surface->texture_target);
5173         texture_target = dst_surface->texture_target;
5174     }
5175
5176     /* Now read the stretched and upside down image into the destination texture */
5177     context_bind_texture(context, texture_target, dst_surface->texture_name);
5178     glCopyTexSubImage2D(texture_target,
5179                         0,
5180                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5181                         0, 0, /* We blitted the image to the origin */
5182                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5183     checkGLcall("glCopyTexSubImage2D");
5184
5185     if(drawBuffer == GL_BACK) {
5186         /* Write the back buffer backup back */
5187         if(backup) {
5188             if(texture_target != GL_TEXTURE_2D) {
5189                 glDisable(texture_target);
5190                 glEnable(GL_TEXTURE_2D);
5191                 texture_target = GL_TEXTURE_2D;
5192             }
5193             context_bind_texture(context, GL_TEXTURE_2D, backup);
5194         }
5195         else
5196         {
5197             if (texture_target != src_surface->texture_target)
5198             {
5199                 glDisable(texture_target);
5200                 glEnable(src_surface->texture_target);
5201                 texture_target = src_surface->texture_target;
5202             }
5203             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5204         }
5205
5206         glBegin(GL_QUADS);
5207             /* top left */
5208             glTexCoord2f(0.0f, 0.0f);
5209             glVertex2i(0, fbheight);
5210
5211             /* bottom left */
5212             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5213             glVertex2i(0, 0);
5214
5215             /* bottom right */
5216             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5217                     (float)fbheight / (float)src_surface->pow2Height);
5218             glVertex2i(fbwidth, 0);
5219
5220             /* top right */
5221             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5222             glVertex2i(fbwidth, fbheight);
5223         glEnd();
5224     }
5225     glDisable(texture_target);
5226     checkGLcall("glDisable(texture_target)");
5227
5228     /* Cleanup */
5229     if (src != src_surface->texture_name && src != backup)
5230     {
5231         glDeleteTextures(1, &src);
5232         checkGLcall("glDeleteTextures(1, &src)");
5233     }
5234     if(backup) {
5235         glDeleteTextures(1, &backup);
5236         checkGLcall("glDeleteTextures(1, &backup)");
5237     }
5238
5239     LEAVE_GL();
5240
5241     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5242
5243     context_release(context);
5244
5245     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5246      * path is never entered
5247      */
5248     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5249 }
5250
5251 /* Front buffer coordinates are always full screen coordinates, but our GL
5252  * drawable is limited to the window's client area. The sysmem and texture
5253  * copies do have the full screen size. Note that GL has a bottom-left
5254  * origin, while D3D has a top-left origin. */
5255 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5256 {
5257     UINT drawable_height;
5258
5259     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5260             && surface == surface->container.u.swapchain->front_buffer)
5261     {
5262         POINT offset = {0, 0};
5263         RECT windowsize;
5264
5265         ScreenToClient(window, &offset);
5266         OffsetRect(rect, offset.x, offset.y);
5267
5268         GetClientRect(window, &windowsize);
5269         drawable_height = windowsize.bottom - windowsize.top;
5270     }
5271     else
5272     {
5273         drawable_height = surface->resource.height;
5274     }
5275
5276     rect->top = drawable_height - rect->top;
5277     rect->bottom = drawable_height - rect->bottom;
5278 }
5279
5280 static void surface_blt_to_drawable(const struct wined3d_device *device,
5281         enum wined3d_texture_filter_type filter, BOOL color_key,
5282         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5283         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5284 {
5285     struct wined3d_context *context;
5286     RECT src_rect, dst_rect;
5287
5288     src_rect = *src_rect_in;
5289     dst_rect = *dst_rect_in;
5290
5291     /* Make sure the surface is up-to-date. This should probably use
5292      * surface_load_location() and worry about the destination surface too,
5293      * unless we're overwriting it completely. */
5294     surface_internal_preload(src_surface, SRGB_RGB);
5295
5296     /* Activate the destination context, set it up for blitting */
5297     context = context_acquire(device, dst_surface);
5298     context_apply_blit_state(context, device);
5299
5300     if (!surface_is_offscreen(dst_surface))
5301         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5302
5303     device->blitter->set_shader(device->blit_priv, context, src_surface);
5304
5305     ENTER_GL();
5306
5307     if (color_key)
5308     {
5309         glEnable(GL_ALPHA_TEST);
5310         checkGLcall("glEnable(GL_ALPHA_TEST)");
5311
5312         /* When the primary render target uses P8, the alpha component
5313          * contains the palette index. Which means that the colorkey is one of
5314          * the palette entries. In other cases pixels that should be masked
5315          * away have alpha set to 0. */
5316         if (primary_render_target_is_p8(device))
5317             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->src_blt_color_key.color_space_low_value / 256.0f);
5318         else
5319             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5320         checkGLcall("glAlphaFunc");
5321     }
5322     else
5323     {
5324         glDisable(GL_ALPHA_TEST);
5325         checkGLcall("glDisable(GL_ALPHA_TEST)");
5326     }
5327
5328     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5329
5330     if (color_key)
5331     {
5332         glDisable(GL_ALPHA_TEST);
5333         checkGLcall("glDisable(GL_ALPHA_TEST)");
5334     }
5335
5336     LEAVE_GL();
5337
5338     /* Leave the opengl state valid for blitting */
5339     device->blitter->unset_shader(context->gl_info);
5340
5341     if (wined3d_settings.strict_draw_ordering
5342             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5343             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5344         wglFlush(); /* Flush to ensure ordering across contexts. */
5345
5346     context_release(context);
5347 }
5348
5349 /* Do not call while under the GL lock. */
5350 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const struct wined3d_color *color)
5351 {
5352     struct wined3d_device *device = s->resource.device;
5353     const struct blit_shader *blitter;
5354
5355     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5356             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5357     if (!blitter)
5358     {
5359         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5360         return WINED3DERR_INVALIDCALL;
5361     }
5362
5363     return blitter->color_fill(device, s, rect, color);
5364 }
5365
5366 /* Do not call while under the GL lock. */
5367 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5368         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5369         enum wined3d_texture_filter_type filter)
5370 {
5371     struct wined3d_device *device = dst_surface->resource.device;
5372     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5373     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5374
5375     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5376             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5377             flags, DDBltFx, debug_d3dtexturefiltertype(filter));
5378
5379     /* Get the swapchain. One of the surfaces has to be a primary surface */
5380     if (dst_surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
5381     {
5382         WARN("Destination is in sysmem, rejecting gl blt\n");
5383         return WINED3DERR_INVALIDCALL;
5384     }
5385
5386     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5387         dstSwapchain = dst_surface->container.u.swapchain;
5388
5389     if (src_surface)
5390     {
5391         if (src_surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
5392         {
5393             WARN("Src is in sysmem, rejecting gl blt\n");
5394             return WINED3DERR_INVALIDCALL;
5395         }
5396
5397         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5398             srcSwapchain = src_surface->container.u.swapchain;
5399     }
5400
5401     /* Early sort out of cases where no render target is used */
5402     if (!dstSwapchain && !srcSwapchain
5403             && src_surface != device->fb.render_targets[0]
5404             && dst_surface != device->fb.render_targets[0])
5405     {
5406         TRACE("No surface is render target, not using hardware blit.\n");
5407         return WINED3DERR_INVALIDCALL;
5408     }
5409
5410     /* No destination color keying supported */
5411     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5412     {
5413         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5414         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5415         return WINED3DERR_INVALIDCALL;
5416     }
5417
5418     if (dstSwapchain && dstSwapchain == srcSwapchain)
5419     {
5420         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5421         return WINED3DERR_INVALIDCALL;
5422     }
5423
5424     if (dstSwapchain && srcSwapchain)
5425     {
5426         FIXME("Implement hardware blit between two different swapchains\n");
5427         return WINED3DERR_INVALIDCALL;
5428     }
5429
5430     if (dstSwapchain)
5431     {
5432         /* Handled with regular texture -> swapchain blit */
5433         if (src_surface == device->fb.render_targets[0])
5434             TRACE("Blit from active render target to a swapchain\n");
5435     }
5436     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5437     {
5438         FIXME("Implement blit from a swapchain to the active render target\n");
5439         return WINED3DERR_INVALIDCALL;
5440     }
5441
5442     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5443     {
5444         /* Blit from render target to texture */
5445         BOOL stretchx;
5446
5447         /* P8 read back is not implemented */
5448         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5449                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5450         {
5451             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5452             return WINED3DERR_INVALIDCALL;
5453         }
5454
5455         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5456         {
5457             TRACE("Color keying not supported by frame buffer to texture blit\n");
5458             return WINED3DERR_INVALIDCALL;
5459             /* Destination color key is checked above */
5460         }
5461
5462         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5463             stretchx = TRUE;
5464         else
5465             stretchx = FALSE;
5466
5467         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5468          * flip the image nor scale it.
5469          *
5470          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5471          * -> If the app wants a image width an unscaled width, copy it line per line
5472          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5473          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5474          *    back buffer. This is slower than reading line per line, thus not used for flipping
5475          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5476          *    pixel by pixel. */
5477         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5478                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5479         {
5480             TRACE("No stretching in x direction, using direct framebuffer -> texture copy.\n");
5481             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, filter);
5482         }
5483         else
5484         {
5485             TRACE("Using hardware stretching to flip / stretch the texture.\n");
5486             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, filter);
5487         }
5488
5489         if (!dst_surface->resource.map_count && !(dst_surface->flags & SFLAG_DONOTFREE))
5490         {
5491             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5492             dst_surface->resource.allocatedMemory = NULL;
5493             dst_surface->resource.heapMemory = NULL;
5494         }
5495         else
5496         {
5497             dst_surface->flags &= ~SFLAG_INSYSMEM;
5498         }
5499
5500         return WINED3D_OK;
5501     }
5502     else if (src_surface)
5503     {
5504         /* Blit from offscreen surface to render target */
5505         struct wined3d_color_key old_blt_key = src_surface->src_blt_color_key;
5506         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5507
5508         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5509
5510         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5511                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5512                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5513         {
5514             FIXME("Unsupported blit operation falling back to software\n");
5515             return WINED3DERR_INVALIDCALL;
5516         }
5517
5518         /* Color keying: Check if we have to do a color keyed blt,
5519          * and if not check if a color key is activated.
5520          *
5521          * Just modify the color keying parameters in the surface and restore them afterwards
5522          * The surface keeps track of the color key last used to load the opengl surface.
5523          * PreLoad will catch the change to the flags and color key and reload if necessary.
5524          */
5525         if (flags & WINEDDBLT_KEYSRC)
5526         {
5527             /* Use color key from surface */
5528         }
5529         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5530         {
5531             /* Use color key from DDBltFx */
5532             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5533             src_surface->src_blt_color_key = DDBltFx->ddckSrcColorkey;
5534         }
5535         else
5536         {
5537             /* Do not use color key */
5538             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5539         }
5540
5541         surface_blt_to_drawable(device, filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5542                 src_surface, src_rect, dst_surface, dst_rect);
5543
5544         /* Restore the color key parameters */
5545         src_surface->CKeyFlags = oldCKeyFlags;
5546         src_surface->src_blt_color_key = old_blt_key;
5547
5548         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5549
5550         return WINED3D_OK;
5551     }
5552
5553     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5554     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5555     return WINED3DERR_INVALIDCALL;
5556 }
5557
5558 /* GL locking is done by the caller */
5559 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5560         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5561 {
5562     struct wined3d_device *device = surface->resource.device;
5563     const struct wined3d_gl_info *gl_info = context->gl_info;
5564     GLint compare_mode = GL_NONE;
5565     struct blt_info info;
5566     GLint old_binding = 0;
5567     RECT rect;
5568
5569     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5570
5571     glDisable(GL_CULL_FACE);
5572     glDisable(GL_BLEND);
5573     glDisable(GL_ALPHA_TEST);
5574     glDisable(GL_SCISSOR_TEST);
5575     glDisable(GL_STENCIL_TEST);
5576     glEnable(GL_DEPTH_TEST);
5577     glDepthFunc(GL_ALWAYS);
5578     glDepthMask(GL_TRUE);
5579     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5580     glViewport(x, y, w, h);
5581     glDepthRange(0.0, 1.0);
5582
5583     SetRect(&rect, 0, h, w, 0);
5584     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5585     context_active_texture(context, context->gl_info, 0);
5586     glGetIntegerv(info.binding, &old_binding);
5587     glBindTexture(info.bind_target, texture);
5588     if (gl_info->supported[ARB_SHADOW])
5589     {
5590         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5591         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5592     }
5593
5594     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5595             gl_info, info.tex_type, &surface->ds_current_size);
5596
5597     glBegin(GL_TRIANGLE_STRIP);
5598     glTexCoord3fv(info.coords[0]);
5599     glVertex2f(-1.0f, -1.0f);
5600     glTexCoord3fv(info.coords[1]);
5601     glVertex2f(1.0f, -1.0f);
5602     glTexCoord3fv(info.coords[2]);
5603     glVertex2f(-1.0f, 1.0f);
5604     glTexCoord3fv(info.coords[3]);
5605     glVertex2f(1.0f, 1.0f);
5606     glEnd();
5607
5608     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5609     glBindTexture(info.bind_target, old_binding);
5610
5611     glPopAttrib();
5612
5613     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5614 }
5615
5616 void surface_modify_ds_location(struct wined3d_surface *surface,
5617         DWORD location, UINT w, UINT h)
5618 {
5619     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5620
5621     if (location & ~(SFLAG_LOCATIONS | SFLAG_DISCARDED))
5622         FIXME("Invalid location (%#x) specified.\n", location);
5623
5624     if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5625             || (!(surface->flags & SFLAG_INTEXTURE) && (location & SFLAG_INTEXTURE)))
5626     {
5627         if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5628         {
5629             TRACE("Passing to container.\n");
5630             wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5631         }
5632     }
5633
5634     surface->ds_current_size.cx = w;
5635     surface->ds_current_size.cy = h;
5636     surface->flags &= ~(SFLAG_LOCATIONS | SFLAG_DISCARDED);
5637     surface->flags |= location;
5638 }
5639
5640 /* Context activation is done by the caller. */
5641 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5642 {
5643     struct wined3d_device *device = surface->resource.device;
5644     GLsizei w, h;
5645
5646     TRACE("surface %p, new location %#x.\n", surface, location);
5647
5648     /* TODO: Make this work for modes other than FBO */
5649     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5650
5651     if (!(surface->flags & location))
5652     {
5653         w = surface->ds_current_size.cx;
5654         h = surface->ds_current_size.cy;
5655         surface->ds_current_size.cx = 0;
5656         surface->ds_current_size.cy = 0;
5657     }
5658     else
5659     {
5660         w = surface->resource.width;
5661         h = surface->resource.height;
5662     }
5663
5664     if (surface->ds_current_size.cx == surface->resource.width
5665             && surface->ds_current_size.cy == surface->resource.height)
5666     {
5667         TRACE("Location (%#x) is already up to date.\n", location);
5668         return;
5669     }
5670
5671     if (surface->current_renderbuffer)
5672     {
5673         FIXME("Not supported with fixed up depth stencil.\n");
5674         return;
5675     }
5676
5677     if (surface->flags & SFLAG_DISCARDED)
5678     {
5679         TRACE("Surface was discarded, no need copy data.\n");
5680         switch (location)
5681         {
5682             case SFLAG_INTEXTURE:
5683                 surface_prepare_texture(surface, context, FALSE);
5684                 break;
5685             case SFLAG_INRB_MULTISAMPLE:
5686                 surface_prepare_rb(surface, context->gl_info, TRUE);
5687                 break;
5688             case SFLAG_INDRAWABLE:
5689                 /* Nothing to do */
5690                 break;
5691             default:
5692                 FIXME("Unhandled location %#x\n", location);
5693         }
5694         surface->flags &= ~SFLAG_DISCARDED;
5695         surface->flags |= location;
5696         surface->ds_current_size.cx = surface->resource.width;
5697         surface->ds_current_size.cy = surface->resource.height;
5698         return;
5699     }
5700
5701     if (!(surface->flags & SFLAG_LOCATIONS))
5702     {
5703         FIXME("No up to date depth stencil location.\n");
5704         surface->flags |= location;
5705         surface->ds_current_size.cx = surface->resource.width;
5706         surface->ds_current_size.cy = surface->resource.height;
5707         return;
5708     }
5709
5710     if (location == SFLAG_INTEXTURE)
5711     {
5712         GLint old_binding = 0;
5713         GLenum bind_target;
5714
5715         /* The render target is allowed to be smaller than the depth/stencil
5716          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5717          * than the offscreen surface. Don't overwrite the offscreen surface
5718          * with undefined data. */
5719         w = min(w, context->swapchain->desc.backbuffer_width);
5720         h = min(h, context->swapchain->desc.backbuffer_height);
5721
5722         TRACE("Copying onscreen depth buffer to depth texture.\n");
5723
5724         ENTER_GL();
5725
5726         if (!device->depth_blt_texture)
5727         {
5728             glGenTextures(1, &device->depth_blt_texture);
5729         }
5730
5731         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5732          * directly on the FBO texture. That's because we need to flip. */
5733         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5734                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5735         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5736         {
5737             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5738             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5739         }
5740         else
5741         {
5742             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5743             bind_target = GL_TEXTURE_2D;
5744         }
5745         glBindTexture(bind_target, device->depth_blt_texture);
5746         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5747          * internal format, because the internal format might include stencil
5748          * data. In principle we should copy stencil data as well, but unless
5749          * the driver supports stencil export it's hard to do, and doesn't
5750          * seem to be needed in practice. If the hardware doesn't support
5751          * writing stencil data, the glCopyTexImage2D() call might trigger
5752          * software fallbacks. */
5753         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5754         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5755         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5756         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5757         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5758         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5759         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5760         glBindTexture(bind_target, old_binding);
5761
5762         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5763                 NULL, surface, SFLAG_INTEXTURE);
5764         context_set_draw_buffer(context, GL_NONE);
5765         glReadBuffer(GL_NONE);
5766
5767         /* Do the actual blit */
5768         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5769         checkGLcall("depth_blt");
5770
5771         context_invalidate_state(context, STATE_FRAMEBUFFER);
5772
5773         LEAVE_GL();
5774
5775         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5776     }
5777     else if (location == SFLAG_INDRAWABLE)
5778     {
5779         TRACE("Copying depth texture to onscreen depth buffer.\n");
5780
5781         ENTER_GL();
5782
5783         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5784                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5785         surface_depth_blt(surface, context, surface->texture_name,
5786                 0, surface->pow2Height - h, w, h, surface->texture_target);
5787         checkGLcall("depth_blt");
5788
5789         context_invalidate_state(context, STATE_FRAMEBUFFER);
5790
5791         LEAVE_GL();
5792
5793         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5794     }
5795     else
5796     {
5797         ERR("Invalid location (%#x) specified.\n", location);
5798     }
5799
5800     surface->flags |= location;
5801     surface->ds_current_size.cx = surface->resource.width;
5802     surface->ds_current_size.cy = surface->resource.height;
5803 }
5804
5805 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5806 {
5807     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5808     struct wined3d_surface *overlay;
5809
5810     TRACE("surface %p, location %s, persistent %#x.\n",
5811             surface, debug_surflocation(location), persistent);
5812
5813     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5814             && !(surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
5815             && (location & SFLAG_INDRAWABLE))
5816         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5817
5818     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5819             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5820         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5821
5822     if (persistent)
5823     {
5824         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5825                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5826         {
5827             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5828             {
5829                 TRACE("Passing to container.\n");
5830                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5831             }
5832         }
5833         surface->flags &= ~SFLAG_LOCATIONS;
5834         surface->flags |= location;
5835
5836         /* Redraw emulated overlays, if any */
5837         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5838         {
5839             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5840             {
5841                 surface_draw_overlay(overlay);
5842             }
5843         }
5844     }
5845     else
5846     {
5847         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5848         {
5849             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5850             {
5851                 TRACE("Passing to container\n");
5852                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5853             }
5854         }
5855         surface->flags &= ~location;
5856     }
5857
5858     if (!(surface->flags & SFLAG_LOCATIONS))
5859     {
5860         ERR("Surface %p does not have any up to date location.\n", surface);
5861     }
5862 }
5863
5864 static DWORD resource_access_from_location(DWORD location)
5865 {
5866     switch (location)
5867     {
5868         case SFLAG_INSYSMEM:
5869             return WINED3D_RESOURCE_ACCESS_CPU;
5870
5871         case SFLAG_INDRAWABLE:
5872         case SFLAG_INSRGBTEX:
5873         case SFLAG_INTEXTURE:
5874         case SFLAG_INRB_MULTISAMPLE:
5875         case SFLAG_INRB_RESOLVED:
5876             return WINED3D_RESOURCE_ACCESS_GPU;
5877
5878         default:
5879             FIXME("Unhandled location %#x.\n", location);
5880             return 0;
5881     }
5882 }
5883
5884 static void surface_load_sysmem(struct wined3d_surface *surface,
5885         const struct wined3d_gl_info *gl_info, const RECT *rect)
5886 {
5887     surface_prepare_system_memory(surface);
5888
5889     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED))
5890         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5891
5892     /* Download the surface to system memory. */
5893     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5894     {
5895         struct wined3d_device *device = surface->resource.device;
5896         struct wined3d_context *context;
5897
5898         /* TODO: Use already acquired context when possible. */
5899         context = context_acquire(device, NULL);
5900
5901         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5902         surface_download_data(surface, gl_info);
5903
5904         context_release(context);
5905
5906         return;
5907     }
5908
5909     if (surface->flags & SFLAG_INDRAWABLE)
5910     {
5911         read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5912                 wined3d_surface_get_pitch(surface));
5913         return;
5914     }
5915
5916     FIXME("Can't load surface %p with location flags %#x into sysmem.\n",
5917             surface, surface->flags & SFLAG_LOCATIONS);
5918 }
5919
5920 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5921         const struct wined3d_gl_info *gl_info, const RECT *rect)
5922 {
5923     struct wined3d_device *device = surface->resource.device;
5924     enum wined3d_conversion_type convert;
5925     struct wined3d_format format;
5926     UINT byte_count;
5927     BYTE *mem;
5928
5929     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5930     {
5931         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5932         return WINED3DERR_INVALIDCALL;
5933     }
5934
5935     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5936         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5937
5938     if (surface->flags & SFLAG_INTEXTURE)
5939     {
5940         RECT r;
5941
5942         surface_get_rect(surface, rect, &r);
5943         surface_blt_to_drawable(device, WINED3D_TEXF_POINT, FALSE, surface, &r, surface, &r);
5944
5945         return WINED3D_OK;
5946     }
5947
5948     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5949     {
5950         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5951          * path through sysmem. */
5952         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5953     }
5954
5955     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5956
5957     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5958      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5959      * called. */
5960     if ((convert != WINED3D_CT_NONE) && (surface->flags & SFLAG_PBO))
5961     {
5962         struct wined3d_context *context;
5963
5964         TRACE("Removing the pbo attached to surface %p.\n", surface);
5965
5966         /* TODO: Use already acquired context when possible. */
5967         context = context_acquire(device, NULL);
5968
5969         surface_remove_pbo(surface, gl_info);
5970
5971         context_release(context);
5972     }
5973
5974     if ((convert != WINED3D_CT_NONE) && surface->resource.allocatedMemory)
5975     {
5976         UINT height = surface->resource.height;
5977         UINT width = surface->resource.width;
5978         UINT src_pitch, dst_pitch;
5979
5980         byte_count = format.conv_byte_count;
5981         src_pitch = wined3d_surface_get_pitch(surface);
5982
5983         /* Stick to the alignment for the converted surface too, makes it
5984          * easier to load the surface. */
5985         dst_pitch = width * byte_count;
5986         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5987
5988         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5989         {
5990             ERR("Out of memory (%u).\n", dst_pitch * height);
5991             return E_OUTOFMEMORY;
5992         }
5993
5994         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5995                 src_pitch, width, height, dst_pitch, convert, surface);
5996
5997         surface->flags |= SFLAG_CONVERTED;
5998     }
5999     else
6000     {
6001         surface->flags &= ~SFLAG_CONVERTED;
6002         mem = surface->resource.allocatedMemory;
6003         byte_count = format.byte_count;
6004     }
6005
6006     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
6007
6008     /* Don't delete PBO memory. */
6009     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6010         HeapFree(GetProcessHeap(), 0, mem);
6011
6012     return WINED3D_OK;
6013 }
6014
6015 static HRESULT surface_load_texture(struct wined3d_surface *surface,
6016         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
6017 {
6018     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
6019     struct wined3d_device *device = surface->resource.device;
6020     enum wined3d_conversion_type convert;
6021     struct wined3d_context *context;
6022     UINT width, src_pitch, dst_pitch;
6023     struct wined3d_bo_address data;
6024     struct wined3d_format format;
6025     POINT dst_point = {0, 0};
6026     BYTE *mem;
6027
6028     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
6029             && surface_is_offscreen(surface)
6030             && (surface->flags & SFLAG_INDRAWABLE))
6031     {
6032         surface_load_fb_texture(surface, srgb);
6033
6034         return WINED3D_OK;
6035     }
6036
6037     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
6038             && (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB)
6039             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6040                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6041                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6042     {
6043         if (srgb)
6044             surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, SFLAG_INTEXTURE,
6045                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
6046         else
6047             surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, SFLAG_INSRGBTEX,
6048                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
6049
6050         return WINED3D_OK;
6051     }
6052
6053     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED)
6054             && (!srgb || (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB))
6055             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6056                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6057                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6058     {
6059         DWORD src_location = surface->flags & SFLAG_INRB_RESOLVED ? SFLAG_INRB_RESOLVED : SFLAG_INRB_MULTISAMPLE;
6060         DWORD dst_location = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
6061         RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6062
6063         surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, src_location,
6064                 &rect, surface, dst_location, &rect);
6065
6066         return WINED3D_OK;
6067     }
6068
6069     /* Upload from system memory */
6070
6071     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
6072             TRUE /* We will use textures */, &format, &convert);
6073
6074     if (srgb)
6075     {
6076         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
6077         {
6078             /* Performance warning... */
6079             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6080             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6081         }
6082     }
6083     else
6084     {
6085         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6086         {
6087             /* Performance warning... */
6088             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6089             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6090         }
6091     }
6092
6093     if (!(surface->flags & SFLAG_INSYSMEM))
6094     {
6095         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6096         /* Lets hope we get it from somewhere... */
6097         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6098     }
6099
6100     /* TODO: Use already acquired context when possible. */
6101     context = context_acquire(device, NULL);
6102
6103     surface_prepare_texture(surface, context, srgb);
6104     surface_bind_and_dirtify(surface, context, srgb);
6105
6106     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6107     {
6108         surface->flags |= SFLAG_GLCKEY;
6109         surface->gl_color_key = surface->src_blt_color_key;
6110     }
6111     else surface->flags &= ~SFLAG_GLCKEY;
6112
6113     width = surface->resource.width;
6114     src_pitch = wined3d_surface_get_pitch(surface);
6115
6116     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6117      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6118      * called. */
6119     if ((convert != WINED3D_CT_NONE || format.convert) && (surface->flags & SFLAG_PBO))
6120     {
6121         TRACE("Removing the pbo attached to surface %p.\n", surface);
6122         surface_remove_pbo(surface, gl_info);
6123     }
6124
6125     if (format.convert)
6126     {
6127         /* This code is entered for texture formats which need a fixup. */
6128         UINT height = surface->resource.height;
6129
6130         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6131         dst_pitch = width * format.conv_byte_count;
6132         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6133
6134         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6135         {
6136             ERR("Out of memory (%u).\n", dst_pitch * height);
6137             context_release(context);
6138             return E_OUTOFMEMORY;
6139         }
6140         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6141         format.byte_count = format.conv_byte_count;
6142         src_pitch = dst_pitch;
6143     }
6144     else if (convert != WINED3D_CT_NONE && surface->resource.allocatedMemory)
6145     {
6146         /* This code is only entered for color keying fixups */
6147         UINT height = surface->resource.height;
6148
6149         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6150         dst_pitch = width * format.conv_byte_count;
6151         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6152
6153         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6154         {
6155             ERR("Out of memory (%u).\n", dst_pitch * height);
6156             context_release(context);
6157             return E_OUTOFMEMORY;
6158         }
6159         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6160                 width, height, dst_pitch, convert, surface);
6161         format.byte_count = format.conv_byte_count;
6162         src_pitch = dst_pitch;
6163     }
6164     else
6165     {
6166         mem = surface->resource.allocatedMemory;
6167     }
6168
6169     data.buffer_object = surface->pbo;
6170     data.addr = mem;
6171     surface_upload_data(surface, gl_info, &format, &src_rect, src_pitch, &dst_point, srgb, &data);
6172
6173     context_release(context);
6174
6175     /* Don't delete PBO memory. */
6176     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6177         HeapFree(GetProcessHeap(), 0, mem);
6178
6179     return WINED3D_OK;
6180 }
6181
6182 static void surface_multisample_resolve(struct wined3d_surface *surface)
6183 {
6184     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6185
6186     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6187         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6188
6189     surface_blt_fbo(surface->resource.device, WINED3D_TEXF_POINT,
6190             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6191 }
6192
6193 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6194 {
6195     struct wined3d_device *device = surface->resource.device;
6196     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6197     HRESULT hr;
6198
6199     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6200
6201     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6202     {
6203         if (location == SFLAG_INTEXTURE)
6204         {
6205             struct wined3d_context *context = context_acquire(device, NULL);
6206             surface_load_ds_location(surface, context, location);
6207             context_release(context);
6208             return WINED3D_OK;
6209         }
6210         else
6211         {
6212             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6213             return WINED3DERR_INVALIDCALL;
6214         }
6215     }
6216
6217     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6218         location = SFLAG_INTEXTURE;
6219
6220     if (surface->flags & location)
6221     {
6222         TRACE("Location already up to date.\n");
6223
6224         if (location == SFLAG_INSYSMEM && !(surface->flags & SFLAG_PBO)
6225                 && surface_need_pbo(surface, gl_info))
6226             surface_load_pbo(surface, gl_info);
6227
6228         return WINED3D_OK;
6229     }
6230
6231     if (WARN_ON(d3d_surface))
6232     {
6233         DWORD required_access = resource_access_from_location(location);
6234         if ((surface->resource.access_flags & required_access) != required_access)
6235             WARN("Operation requires %#x access, but surface only has %#x.\n",
6236                     required_access, surface->resource.access_flags);
6237     }
6238
6239     if (!(surface->flags & SFLAG_LOCATIONS))
6240     {
6241         ERR("Surface %p does not have any up to date location.\n", surface);
6242         surface->flags |= SFLAG_LOST;
6243         return WINED3DERR_DEVICELOST;
6244     }
6245
6246     switch (location)
6247     {
6248         case SFLAG_INSYSMEM:
6249             surface_load_sysmem(surface, gl_info, rect);
6250             break;
6251
6252         case SFLAG_INDRAWABLE:
6253             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6254                 return hr;
6255             break;
6256
6257         case SFLAG_INRB_RESOLVED:
6258             surface_multisample_resolve(surface);
6259             break;
6260
6261         case SFLAG_INTEXTURE:
6262         case SFLAG_INSRGBTEX:
6263             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6264                 return hr;
6265             break;
6266
6267         default:
6268             ERR("Don't know how to handle location %#x.\n", location);
6269             break;
6270     }
6271
6272     if (!rect)
6273     {
6274         surface->flags |= location;
6275
6276         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6277             surface_evict_sysmem(surface);
6278     }
6279
6280     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6281             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6282     {
6283         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6284     }
6285
6286     return WINED3D_OK;
6287 }
6288
6289 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6290 {
6291     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6292
6293     /* Not on a swapchain - must be offscreen */
6294     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6295
6296     /* The front buffer is always onscreen */
6297     if (surface == swapchain->front_buffer) return FALSE;
6298
6299     /* If the swapchain is rendered to an FBO, the backbuffer is
6300      * offscreen, otherwise onscreen */
6301     return swapchain->render_to_fbo;
6302 }
6303
6304 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6305 /* Context activation is done by the caller. */
6306 static void ffp_blit_free(struct wined3d_device *device) { }
6307
6308 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6309 /* Context activation is done by the caller. */
6310 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6311 {
6312     BYTE table[256][4];
6313     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6314
6315     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6316
6317     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6318     ENTER_GL();
6319     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6320     LEAVE_GL();
6321 }
6322
6323 /* Context activation is done by the caller. */
6324 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6325 {
6326     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6327
6328     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6329      * else the surface is converted in software at upload time in LoadLocation.
6330      */
6331     if (!(surface->flags & SFLAG_CONVERTED) && fixup == COMPLEX_FIXUP_P8
6332             && context->gl_info->supported[EXT_PALETTED_TEXTURE])
6333         ffp_blit_p8_upload_palette(surface, context->gl_info);
6334
6335     ENTER_GL();
6336     glEnable(surface->texture_target);
6337     checkGLcall("glEnable(surface->texture_target)");
6338     LEAVE_GL();
6339     return WINED3D_OK;
6340 }
6341
6342 /* Context activation is done by the caller. */
6343 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6344 {
6345     ENTER_GL();
6346     glDisable(GL_TEXTURE_2D);
6347     checkGLcall("glDisable(GL_TEXTURE_2D)");
6348     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6349     {
6350         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6351         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6352     }
6353     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6354     {
6355         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6356         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6357     }
6358     LEAVE_GL();
6359 }
6360
6361 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6362         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
6363         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
6364 {
6365     enum complex_fixup src_fixup;
6366
6367     switch (blit_op)
6368     {
6369         case WINED3D_BLIT_OP_COLOR_BLIT:
6370             if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM)
6371                 return FALSE;
6372
6373             src_fixup = get_complex_fixup(src_format->color_fixup);
6374             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6375             {
6376                 TRACE("Checking support for fixup:\n");
6377                 dump_color_fixup_desc(src_format->color_fixup);
6378             }
6379
6380             if (!is_identity_fixup(dst_format->color_fixup))
6381             {
6382                 TRACE("Destination fixups are not supported\n");
6383                 return FALSE;
6384             }
6385
6386             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6387             {
6388                 TRACE("P8 fixup supported\n");
6389                 return TRUE;
6390             }
6391
6392             /* We only support identity conversions. */
6393             if (is_identity_fixup(src_format->color_fixup))
6394             {
6395                 TRACE("[OK]\n");
6396                 return TRUE;
6397             }
6398
6399             TRACE("[FAILED]\n");
6400             return FALSE;
6401
6402         case WINED3D_BLIT_OP_COLOR_FILL:
6403             if (dst_pool == WINED3D_POOL_SYSTEM_MEM)
6404                 return FALSE;
6405
6406             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6407             {
6408                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6409                     return FALSE;
6410             }
6411             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6412             {
6413                 TRACE("Color fill not supported\n");
6414                 return FALSE;
6415             }
6416
6417             /* FIXME: We should reject color fills on formats with fixups,
6418              * but this would break P8 color fills for example. */
6419
6420             return TRUE;
6421
6422         case WINED3D_BLIT_OP_DEPTH_FILL:
6423             return TRUE;
6424
6425         default:
6426             TRACE("Unsupported blit_op=%d\n", blit_op);
6427             return FALSE;
6428     }
6429 }
6430
6431 /* Do not call while under the GL lock. */
6432 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6433         const RECT *dst_rect, const struct wined3d_color *color)
6434 {
6435     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6436     struct wined3d_fb_state fb = {&dst_surface, NULL};
6437
6438     device_clear_render_targets(device, 1, &fb, 1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6439
6440     return WINED3D_OK;
6441 }
6442
6443 /* Do not call while under the GL lock. */
6444 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6445         struct wined3d_surface *surface, const RECT *rect, float depth)
6446 {
6447     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6448     struct wined3d_fb_state fb = {NULL, surface};
6449
6450     device_clear_render_targets(device, 0, &fb, 1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6451
6452     return WINED3D_OK;
6453 }
6454
6455 const struct blit_shader ffp_blit =  {
6456     ffp_blit_alloc,
6457     ffp_blit_free,
6458     ffp_blit_set,
6459     ffp_blit_unset,
6460     ffp_blit_supported,
6461     ffp_blit_color_fill,
6462     ffp_blit_depth_fill,
6463 };
6464
6465 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6466 {
6467     return WINED3D_OK;
6468 }
6469
6470 /* Context activation is done by the caller. */
6471 static void cpu_blit_free(struct wined3d_device *device)
6472 {
6473 }
6474
6475 /* Context activation is done by the caller. */
6476 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6477 {
6478     return WINED3D_OK;
6479 }
6480
6481 /* Context activation is done by the caller. */
6482 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6483 {
6484 }
6485
6486 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6487         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
6488         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
6489 {
6490     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6491     {
6492         return TRUE;
6493     }
6494
6495     return FALSE;
6496 }
6497
6498 static HRESULT surface_cpu_blt_compressed(const BYTE *src_data, BYTE *dst_data,
6499         UINT src_pitch, UINT dst_pitch, UINT update_w, UINT update_h,
6500         const struct wined3d_format *format, DWORD flags, const WINEDDBLTFX *fx)
6501 {
6502     UINT row_block_count;
6503     const BYTE *src_row;
6504     BYTE *dst_row;
6505     UINT x, y;
6506
6507     src_row = src_data;
6508     dst_row = dst_data;
6509
6510     row_block_count = (update_w + format->block_width - 1) / format->block_width;
6511
6512     if (!flags)
6513     {
6514         for (y = 0; y < update_h; y += format->block_height)
6515         {
6516             memcpy(dst_row, src_row, row_block_count * format->block_byte_count);
6517             src_row += src_pitch;
6518             dst_row += dst_pitch;
6519         }
6520
6521         return WINED3D_OK;
6522     }
6523
6524     if (flags == WINEDDBLT_DDFX && fx->dwDDFX == WINEDDBLTFX_MIRRORUPDOWN)
6525     {
6526         src_row += (((update_h / format->block_height) - 1) * src_pitch);
6527
6528         switch (format->id)
6529         {
6530             case WINED3DFMT_DXT1:
6531                 for (y = 0; y < update_h; y += format->block_height)
6532                 {
6533                     struct block
6534                     {
6535                         WORD color[2];
6536                         BYTE control_row[4];
6537                     };
6538
6539                     const struct block *s = (const struct block *)src_row;
6540                     struct block *d = (struct block *)dst_row;
6541
6542                     for (x = 0; x < row_block_count; ++x)
6543                     {
6544                         d[x].color[0] = s[x].color[0];
6545                         d[x].color[1] = s[x].color[1];
6546                         d[x].control_row[0] = s[x].control_row[3];
6547                         d[x].control_row[1] = s[x].control_row[2];
6548                         d[x].control_row[2] = s[x].control_row[1];
6549                         d[x].control_row[3] = s[x].control_row[0];
6550                     }
6551                     src_row -= src_pitch;
6552                     dst_row += dst_pitch;
6553                 }
6554                 return WINED3D_OK;
6555
6556             case WINED3DFMT_DXT3:
6557                 for (y = 0; y < update_h; y += format->block_height)
6558                 {
6559                     struct block
6560                     {
6561                         WORD alpha_row[4];
6562                         WORD color[2];
6563                         BYTE control_row[4];
6564                     };
6565
6566                     const struct block *s = (const struct block *)src_row;
6567                     struct block *d = (struct block *)dst_row;
6568
6569                     for (x = 0; x < row_block_count; ++x)
6570                     {
6571                         d[x].alpha_row[0] = s[x].alpha_row[3];
6572                         d[x].alpha_row[1] = s[x].alpha_row[2];
6573                         d[x].alpha_row[2] = s[x].alpha_row[1];
6574                         d[x].alpha_row[3] = s[x].alpha_row[0];
6575                         d[x].color[0] = s[x].color[0];
6576                         d[x].color[1] = s[x].color[1];
6577                         d[x].control_row[0] = s[x].control_row[3];
6578                         d[x].control_row[1] = s[x].control_row[2];
6579                         d[x].control_row[2] = s[x].control_row[1];
6580                         d[x].control_row[3] = s[x].control_row[0];
6581                     }
6582                     src_row -= src_pitch;
6583                     dst_row += dst_pitch;
6584                 }
6585                 return WINED3D_OK;
6586
6587             default:
6588                 FIXME("Compressed flip not implemented for format %s.\n",
6589                         debug_d3dformat(format->id));
6590                 return E_NOTIMPL;
6591         }
6592     }
6593
6594     FIXME("Unsupported blit on compressed surface (format %s, flags %#x, DDFX %#x).\n",
6595             debug_d3dformat(format->id), flags, flags & WINEDDBLT_DDFX ? fx->dwDDFX : 0);
6596
6597     return E_NOTIMPL;
6598 }
6599
6600 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6601         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6602         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter)
6603 {
6604     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6605     const struct wined3d_format *src_format, *dst_format;
6606     struct wined3d_surface *orig_src = src_surface;
6607     struct wined3d_map_desc dst_map, src_map;
6608     const BYTE *sbase = NULL;
6609     HRESULT hr = WINED3D_OK;
6610     const BYTE *sbuf;
6611     BYTE *dbuf;
6612     int x, y;
6613
6614     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6615             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6616             flags, fx, debug_d3dtexturefiltertype(filter));
6617
6618     if (src_surface == dst_surface)
6619     {
6620         wined3d_surface_map(dst_surface, &dst_map, NULL, 0);
6621         src_map = dst_map;
6622         src_format = dst_surface->resource.format;
6623         dst_format = src_format;
6624     }
6625     else
6626     {
6627         dst_format = dst_surface->resource.format;
6628         if (src_surface)
6629         {
6630             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6631             {
6632                 src_surface = surface_convert_format(src_surface, dst_format->id);
6633                 if (!src_surface)
6634                 {
6635                     /* The conv function writes a FIXME */
6636                     WARN("Cannot convert source surface format to dest format.\n");
6637                     goto release;
6638                 }
6639             }
6640             wined3d_surface_map(src_surface, &src_map, NULL, WINED3DLOCK_READONLY);
6641             src_format = src_surface->resource.format;
6642         }
6643         else
6644         {
6645             src_format = dst_format;
6646         }
6647
6648         wined3d_surface_map(dst_surface, &dst_map, dst_rect, 0);
6649     }
6650
6651     bpp = dst_surface->resource.format->byte_count;
6652     srcheight = src_rect->bottom - src_rect->top;
6653     srcwidth = src_rect->right - src_rect->left;
6654     dstheight = dst_rect->bottom - dst_rect->top;
6655     dstwidth = dst_rect->right - dst_rect->left;
6656     width = (dst_rect->right - dst_rect->left) * bpp;
6657
6658     if (src_surface)
6659         sbase = (BYTE *)src_map.data
6660                 + ((src_rect->top / src_format->block_height) * src_map.row_pitch)
6661                 + ((src_rect->left / src_format->block_width) * src_format->block_byte_count);
6662     if (src_surface != dst_surface)
6663         dbuf = dst_map.data;
6664     else
6665         dbuf = (BYTE *)dst_map.data
6666                 + ((dst_rect->top / dst_format->block_height) * dst_map.row_pitch)
6667                 + ((dst_rect->left / dst_format->block_width) * dst_format->block_byte_count);
6668
6669     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_BLOCKS)
6670     {
6671         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6672
6673         if (src_surface == dst_surface)
6674         {
6675             FIXME("Only plain blits supported on compressed surfaces.\n");
6676             hr = E_NOTIMPL;
6677             goto release;
6678         }
6679
6680         if (srcheight != dstheight || srcwidth != dstwidth)
6681         {
6682             WARN("Stretching not supported on compressed surfaces.\n");
6683             hr = WINED3DERR_INVALIDCALL;
6684             goto release;
6685         }
6686
6687         if (srcwidth & (src_format->block_width - 1) || srcheight & (src_format->block_height - 1))
6688         {
6689             WARN("Rectangle not block-aligned.\n");
6690             hr = WINED3DERR_INVALIDCALL;
6691             goto release;
6692         }
6693
6694         hr = surface_cpu_blt_compressed(sbase, dbuf,
6695                 src_map.row_pitch, dst_map.row_pitch, dstwidth, dstheight,
6696                 src_format, flags, fx);
6697         goto release;
6698     }
6699
6700     /* First, all the 'source-less' blits */
6701     if (flags & WINEDDBLT_COLORFILL)
6702     {
6703         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, fx->u5.dwFillColor);
6704         flags &= ~WINEDDBLT_COLORFILL;
6705     }
6706
6707     if (flags & WINEDDBLT_DEPTHFILL)
6708     {
6709         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6710     }
6711     if (flags & WINEDDBLT_ROP)
6712     {
6713         /* Catch some degenerate cases here. */
6714         switch (fx->dwROP)
6715         {
6716             case BLACKNESS:
6717                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, 0);
6718                 break;
6719             case 0xAA0029: /* No-op */
6720                 break;
6721             case WHITENESS:
6722                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, ~0U);
6723                 break;
6724             case SRCCOPY: /* Well, we do that below? */
6725                 break;
6726             default:
6727                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6728                 goto error;
6729         }
6730         flags &= ~WINEDDBLT_ROP;
6731     }
6732     if (flags & WINEDDBLT_DDROPS)
6733     {
6734         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6735     }
6736     /* Now the 'with source' blits. */
6737     if (src_surface)
6738     {
6739         int sx, xinc, sy, yinc;
6740
6741         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6742             goto release;
6743
6744         if (filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT
6745                 && (srcwidth != dstwidth || srcheight != dstheight))
6746         {
6747             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6748             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6749         }
6750
6751         xinc = (srcwidth << 16) / dstwidth;
6752         yinc = (srcheight << 16) / dstheight;
6753
6754         if (!flags)
6755         {
6756             /* No effects, we can cheat here. */
6757             if (dstwidth == srcwidth)
6758             {
6759                 if (dstheight == srcheight)
6760                 {
6761                     /* No stretching in either direction. This needs to be as
6762                      * fast as possible. */
6763                     sbuf = sbase;
6764
6765                     /* Check for overlapping surfaces. */
6766                     if (src_surface != dst_surface || dst_rect->top < src_rect->top
6767                             || dst_rect->right <= src_rect->left || src_rect->right <= dst_rect->left)
6768                     {
6769                         /* No overlap, or dst above src, so copy from top downwards. */
6770                         for (y = 0; y < dstheight; ++y)
6771                         {
6772                             memcpy(dbuf, sbuf, width);
6773                             sbuf += src_map.row_pitch;
6774                             dbuf += dst_map.row_pitch;
6775                         }
6776                     }
6777                     else if (dst_rect->top > src_rect->top)
6778                     {
6779                         /* Copy from bottom upwards. */
6780                         sbuf += src_map.row_pitch * dstheight;
6781                         dbuf += dst_map.row_pitch * dstheight;
6782                         for (y = 0; y < dstheight; ++y)
6783                         {
6784                             sbuf -= src_map.row_pitch;
6785                             dbuf -= dst_map.row_pitch;
6786                             memcpy(dbuf, sbuf, width);
6787                         }
6788                     }
6789                     else
6790                     {
6791                         /* Src and dst overlapping on the same line, use memmove. */
6792                         for (y = 0; y < dstheight; ++y)
6793                         {
6794                             memmove(dbuf, sbuf, width);
6795                             sbuf += src_map.row_pitch;
6796                             dbuf += dst_map.row_pitch;
6797                         }
6798                     }
6799                 }
6800                 else
6801                 {
6802                     /* Stretching in y direction only. */
6803                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6804                     {
6805                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6806                         memcpy(dbuf, sbuf, width);
6807                         dbuf += dst_map.row_pitch;
6808                     }
6809                 }
6810             }
6811             else
6812             {
6813                 /* Stretching in X direction. */
6814                 int last_sy = -1;
6815                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6816                 {
6817                     sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6818
6819                     if ((sy >> 16) == (last_sy >> 16))
6820                     {
6821                         /* This source row is the same as last source row -
6822                          * Copy the already stretched row. */
6823                         memcpy(dbuf, dbuf - dst_map.row_pitch, width);
6824                     }
6825                     else
6826                     {
6827 #define STRETCH_ROW(type) \
6828 do { \
6829     const type *s = (const type *)sbuf; \
6830     type *d = (type *)dbuf; \
6831     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6832         d[x] = s[sx >> 16]; \
6833 } while(0)
6834
6835                         switch(bpp)
6836                         {
6837                             case 1:
6838                                 STRETCH_ROW(BYTE);
6839                                 break;
6840                             case 2:
6841                                 STRETCH_ROW(WORD);
6842                                 break;
6843                             case 4:
6844                                 STRETCH_ROW(DWORD);
6845                                 break;
6846                             case 3:
6847                             {
6848                                 const BYTE *s;
6849                                 BYTE *d = dbuf;
6850                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6851                                 {
6852                                     DWORD pixel;
6853
6854                                     s = sbuf + 3 * (sx >> 16);
6855                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6856                                     d[0] = (pixel      ) & 0xff;
6857                                     d[1] = (pixel >>  8) & 0xff;
6858                                     d[2] = (pixel >> 16) & 0xff;
6859                                     d += 3;
6860                                 }
6861                                 break;
6862                             }
6863                             default:
6864                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6865                                 hr = WINED3DERR_NOTAVAILABLE;
6866                                 goto error;
6867                         }
6868 #undef STRETCH_ROW
6869                     }
6870                     dbuf += dst_map.row_pitch;
6871                     last_sy = sy;
6872                 }
6873             }
6874         }
6875         else
6876         {
6877             LONG dstyinc = dst_map.row_pitch, dstxinc = bpp;
6878             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6879             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6880             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6881             {
6882                 /* The color keying flags are checked for correctness in ddraw */
6883                 if (flags & WINEDDBLT_KEYSRC)
6884                 {
6885                     keylow  = src_surface->src_blt_color_key.color_space_low_value;
6886                     keyhigh = src_surface->src_blt_color_key.color_space_high_value;
6887                 }
6888                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6889                 {
6890                     keylow = fx->ddckSrcColorkey.color_space_low_value;
6891                     keyhigh = fx->ddckSrcColorkey.color_space_high_value;
6892                 }
6893
6894                 if (flags & WINEDDBLT_KEYDEST)
6895                 {
6896                     /* Destination color keys are taken from the source surface! */
6897                     destkeylow = src_surface->dst_blt_color_key.color_space_low_value;
6898                     destkeyhigh = src_surface->dst_blt_color_key.color_space_high_value;
6899                 }
6900                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6901                 {
6902                     destkeylow = fx->ddckDestColorkey.color_space_low_value;
6903                     destkeyhigh = fx->ddckDestColorkey.color_space_high_value;
6904                 }
6905
6906                 if (bpp == 1)
6907                 {
6908                     keymask = 0xff;
6909                 }
6910                 else
6911                 {
6912                     keymask = src_format->red_mask
6913                             | src_format->green_mask
6914                             | src_format->blue_mask;
6915                 }
6916                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6917             }
6918
6919             if (flags & WINEDDBLT_DDFX)
6920             {
6921                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6922                 LONG tmpxy;
6923                 dTopLeft     = dbuf;
6924                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6925                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dst_map.row_pitch);
6926                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6927
6928                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6929                 {
6930                     /* I don't think we need to do anything about this flag */
6931                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6932                 }
6933                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6934                 {
6935                     tmp          = dTopRight;
6936                     dTopRight    = dTopLeft;
6937                     dTopLeft     = tmp;
6938                     tmp          = dBottomRight;
6939                     dBottomRight = dBottomLeft;
6940                     dBottomLeft  = tmp;
6941                     dstxinc = dstxinc * -1;
6942                 }
6943                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6944                 {
6945                     tmp          = dTopLeft;
6946                     dTopLeft     = dBottomLeft;
6947                     dBottomLeft  = tmp;
6948                     tmp          = dTopRight;
6949                     dTopRight    = dBottomRight;
6950                     dBottomRight = tmp;
6951                     dstyinc = dstyinc * -1;
6952                 }
6953                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6954                 {
6955                     /* I don't think we need to do anything about this flag */
6956                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6957                 }
6958                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6959                 {
6960                     tmp          = dBottomRight;
6961                     dBottomRight = dTopLeft;
6962                     dTopLeft     = tmp;
6963                     tmp          = dBottomLeft;
6964                     dBottomLeft  = dTopRight;
6965                     dTopRight    = tmp;
6966                     dstxinc = dstxinc * -1;
6967                     dstyinc = dstyinc * -1;
6968                 }
6969                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6970                 {
6971                     tmp          = dTopLeft;
6972                     dTopLeft     = dBottomLeft;
6973                     dBottomLeft  = dBottomRight;
6974                     dBottomRight = dTopRight;
6975                     dTopRight    = tmp;
6976                     tmpxy   = dstxinc;
6977                     dstxinc = dstyinc;
6978                     dstyinc = tmpxy;
6979                     dstxinc = dstxinc * -1;
6980                 }
6981                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6982                 {
6983                     tmp          = dTopLeft;
6984                     dTopLeft     = dTopRight;
6985                     dTopRight    = dBottomRight;
6986                     dBottomRight = dBottomLeft;
6987                     dBottomLeft  = tmp;
6988                     tmpxy   = dstxinc;
6989                     dstxinc = dstyinc;
6990                     dstyinc = tmpxy;
6991                     dstyinc = dstyinc * -1;
6992                 }
6993                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6994                 {
6995                     /* I don't think we need to do anything about this flag */
6996                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6997                 }
6998                 dbuf = dTopLeft;
6999                 flags &= ~(WINEDDBLT_DDFX);
7000             }
7001
7002 #define COPY_COLORKEY_FX(type) \
7003 do { \
7004     const type *s; \
7005     type *d = (type *)dbuf, *dx, tmp; \
7006     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
7007     { \
7008         s = (const type *)(sbase + (sy >> 16) * src_map.row_pitch); \
7009         dx = d; \
7010         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
7011         { \
7012             tmp = s[sx >> 16]; \
7013             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
7014                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
7015             { \
7016                 dx[0] = tmp; \
7017             } \
7018             dx = (type *)(((BYTE *)dx) + dstxinc); \
7019         } \
7020         d = (type *)(((BYTE *)d) + dstyinc); \
7021     } \
7022 } while(0)
7023
7024             switch (bpp)
7025             {
7026                 case 1:
7027                     COPY_COLORKEY_FX(BYTE);
7028                     break;
7029                 case 2:
7030                     COPY_COLORKEY_FX(WORD);
7031                     break;
7032                 case 4:
7033                     COPY_COLORKEY_FX(DWORD);
7034                     break;
7035                 case 3:
7036                 {
7037                     const BYTE *s;
7038                     BYTE *d = dbuf, *dx;
7039                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
7040                     {
7041                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
7042                         dx = d;
7043                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7044                         {
7045                             DWORD pixel, dpixel = 0;
7046                             s = sbuf + 3 * (sx>>16);
7047                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7048                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7049                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7050                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7051                             {
7052                                 dx[0] = (pixel      ) & 0xff;
7053                                 dx[1] = (pixel >>  8) & 0xff;
7054                                 dx[2] = (pixel >> 16) & 0xff;
7055                             }
7056                             dx += dstxinc;
7057                         }
7058                         d += dstyinc;
7059                     }
7060                     break;
7061                 }
7062                 default:
7063                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7064                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7065                     hr = WINED3DERR_NOTAVAILABLE;
7066                     goto error;
7067 #undef COPY_COLORKEY_FX
7068             }
7069         }
7070     }
7071
7072 error:
7073     if (flags && FIXME_ON(d3d_surface))
7074     {
7075         FIXME("\tUnsupported flags: %#x.\n", flags);
7076     }
7077
7078 release:
7079     wined3d_surface_unmap(dst_surface);
7080     if (src_surface && src_surface != dst_surface)
7081         wined3d_surface_unmap(src_surface);
7082     /* Release the converted surface, if any. */
7083     if (src_surface && src_surface != orig_src)
7084         wined3d_surface_decref(src_surface);
7085
7086     return hr;
7087 }
7088
7089 /* Do not call while under the GL lock. */
7090 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7091         const RECT *dst_rect, const struct wined3d_color *color)
7092 {
7093     static const RECT src_rect;
7094     WINEDDBLTFX BltFx;
7095
7096     memset(&BltFx, 0, sizeof(BltFx));
7097     BltFx.dwSize = sizeof(BltFx);
7098     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7099     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7100             WINEDDBLT_COLORFILL, &BltFx, WINED3D_TEXF_POINT);
7101 }
7102
7103 /* Do not call while under the GL lock. */
7104 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7105         struct wined3d_surface *surface, const RECT *rect, float depth)
7106 {
7107     FIXME("Depth filling not implemented by cpu_blit.\n");
7108     return WINED3DERR_INVALIDCALL;
7109 }
7110
7111 const struct blit_shader cpu_blit =  {
7112     cpu_blit_alloc,
7113     cpu_blit_free,
7114     cpu_blit_set,
7115     cpu_blit_unset,
7116     cpu_blit_supported,
7117     cpu_blit_color_fill,
7118     cpu_blit_depth_fill,
7119 };
7120
7121 static HRESULT surface_init(struct wined3d_surface *surface, enum wined3d_surface_type surface_type, UINT alignment,
7122         UINT width, UINT height, UINT level, enum wined3d_multisample_type multisample_type,
7123         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7124         enum wined3d_pool pool, DWORD flags, void *parent, const struct wined3d_parent_ops *parent_ops)
7125 {
7126     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7127     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7128     BOOL lockable = flags & WINED3D_SURFACE_MAPPABLE;
7129     unsigned int resource_size;
7130     HRESULT hr;
7131
7132     if (multisample_quality > 0)
7133     {
7134         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7135         multisample_quality = 0;
7136     }
7137
7138     /* Quick lockable sanity check.
7139      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7140      * this function is too deep to need to care about things like this.
7141      * Levels need to be checked too, since they all affect what can be done. */
7142     switch (pool)
7143     {
7144         case WINED3D_POOL_SCRATCH:
7145             if (!lockable)
7146             {
7147                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7148                         "which are mutually exclusive, setting lockable to TRUE.\n");
7149                 lockable = TRUE;
7150             }
7151             break;
7152
7153         case WINED3D_POOL_SYSTEM_MEM:
7154             if (!lockable)
7155                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7156             break;
7157
7158         case WINED3D_POOL_MANAGED:
7159             if (usage & WINED3DUSAGE_DYNAMIC)
7160                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7161             break;
7162
7163         case WINED3D_POOL_DEFAULT:
7164             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7165                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7166             break;
7167
7168         default:
7169             FIXME("Unknown pool %#x.\n", pool);
7170             break;
7171     };
7172
7173     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3D_POOL_DEFAULT)
7174         FIXME("Trying to create a render target that isn't in the default pool.\n");
7175
7176     /* FIXME: Check that the format is supported by the device. */
7177
7178     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7179     if (!resource_size)
7180         return WINED3DERR_INVALIDCALL;
7181
7182     surface->surface_type = surface_type;
7183
7184     switch (surface_type)
7185     {
7186         case WINED3D_SURFACE_TYPE_OPENGL:
7187             surface->surface_ops = &surface_ops;
7188             break;
7189
7190         case WINED3D_SURFACE_TYPE_GDI:
7191             surface->surface_ops = &gdi_surface_ops;
7192             break;
7193
7194         default:
7195             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7196             return WINED3DERR_INVALIDCALL;
7197     }
7198
7199     hr = resource_init(&surface->resource, device, WINED3D_RTYPE_SURFACE, format,
7200             multisample_type, multisample_quality, usage, pool, width, height, 1,
7201             resource_size, parent, parent_ops, &surface_resource_ops);
7202     if (FAILED(hr))
7203     {
7204         WARN("Failed to initialize resource, returning %#x.\n", hr);
7205         return hr;
7206     }
7207
7208     /* "Standalone" surface. */
7209     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7210
7211     surface->texture_level = level;
7212     list_init(&surface->overlays);
7213
7214     /* Flags */
7215     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7216     if (flags & WINED3D_SURFACE_DISCARD)
7217         surface->flags |= SFLAG_DISCARD;
7218     if (flags & WINED3D_SURFACE_PIN_SYSMEM)
7219         surface->flags |= SFLAG_PIN_SYSMEM;
7220     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7221         surface->flags |= SFLAG_LOCKABLE;
7222     /* I'm not sure if this qualifies as a hack or as an optimization. It
7223      * seems reasonable to assume that lockable render targets will get
7224      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7225      * creation. However, the other reason we want to do this is that several
7226      * ddraw applications access surface memory while the surface isn't
7227      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7228      * future locks prevents these from crashing. */
7229     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7230         surface->flags |= SFLAG_DYNLOCK;
7231
7232     /* Mark the texture as dirty so that it gets loaded first time around. */
7233     surface_add_dirty_rect(surface, NULL);
7234     list_init(&surface->renderbuffers);
7235
7236     TRACE("surface %p, memory %p, size %u\n",
7237             surface, surface->resource.allocatedMemory, surface->resource.size);
7238
7239     /* Call the private setup routine */
7240     hr = surface->surface_ops->surface_private_setup(surface);
7241     if (FAILED(hr))
7242     {
7243         ERR("Private setup failed, returning %#x\n", hr);
7244         surface_cleanup(surface);
7245         return hr;
7246     }
7247
7248     /* Similar to lockable rendertargets above, creating the DIB section
7249      * during surface initialization prevents the sysmem pointer from changing
7250      * after a wined3d_surface_getdc() call. */
7251     if ((usage & WINED3DUSAGE_OWNDC) && !surface->hDC
7252             && SUCCEEDED(surface_create_dib_section(surface)))
7253     {
7254         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
7255         surface->resource.heapMemory = NULL;
7256         surface->resource.allocatedMemory = surface->dib.bitmap_data;
7257     }
7258
7259     return hr;
7260 }
7261
7262 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7263         enum wined3d_format_id format_id, UINT level, DWORD usage, enum wined3d_pool pool,
7264         enum wined3d_multisample_type multisample_type, DWORD multisample_quality,
7265         enum wined3d_surface_type surface_type, DWORD flags, void *parent,
7266         const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7267 {
7268     struct wined3d_surface *object;
7269     HRESULT hr;
7270
7271     TRACE("device %p, width %u, height %u, format %s, level %u\n",
7272             device, width, height, debug_d3dformat(format_id), level);
7273     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7274             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7275     TRACE("surface_type %#x, flags %#x, parent %p, parent_ops %p.\n", surface_type, flags, parent, parent_ops);
7276
7277     if (surface_type == WINED3D_SURFACE_TYPE_OPENGL && !device->adapter)
7278     {
7279         ERR("OpenGL surfaces are not available without OpenGL.\n");
7280         return WINED3DERR_NOTAVAILABLE;
7281     }
7282
7283     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7284     if (!object)
7285     {
7286         ERR("Failed to allocate surface memory.\n");
7287         return WINED3DERR_OUTOFVIDEOMEMORY;
7288     }
7289
7290     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level,
7291             multisample_type, multisample_quality, device, usage, format_id, pool, flags, parent, parent_ops);
7292     if (FAILED(hr))
7293     {
7294         WARN("Failed to initialize surface, returning %#x.\n", hr);
7295         HeapFree(GetProcessHeap(), 0, object);
7296         return hr;
7297     }
7298
7299     TRACE("Created surface %p.\n", object);
7300     *surface = object;
7301
7302     return hr;
7303 }