shell32: Handle ASSOCDATA_EDITFLAGS in IQueryAssociations:GetData function.
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2011 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         enum wined3d_texture_filter_type filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     struct wined3d_surface *overlay, *cur;
46
47     TRACE("surface %p.\n", surface);
48
49     if (surface->texture_name || (surface->flags & SFLAG_PBO)
50              || surface->rb_multisample || surface->rb_resolved
51              || !list_empty(&surface->renderbuffers))
52     {
53         struct wined3d_renderbuffer_entry *entry, *entry2;
54         const struct wined3d_gl_info *gl_info;
55         struct wined3d_context *context;
56
57         context = context_acquire(surface->resource.device, NULL);
58         gl_info = context->gl_info;
59
60         ENTER_GL();
61
62         if (surface->texture_name)
63         {
64             TRACE("Deleting texture %u.\n", surface->texture_name);
65             glDeleteTextures(1, &surface->texture_name);
66         }
67
68         if (surface->flags & SFLAG_PBO)
69         {
70             TRACE("Deleting PBO %u.\n", surface->pbo);
71             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
72         }
73
74         if (surface->rb_multisample)
75         {
76             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
77             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
78         }
79
80         if (surface->rb_resolved)
81         {
82             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
83             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
84         }
85
86         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
87         {
88             TRACE("Deleting renderbuffer %u.\n", entry->id);
89             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
90             HeapFree(GetProcessHeap(), 0, entry);
91         }
92
93         LEAVE_GL();
94
95         context_release(context);
96     }
97
98     if (surface->flags & SFLAG_DIBSECTION)
99     {
100         DeleteDC(surface->hDC);
101         DeleteObject(surface->dib.DIBsection);
102         surface->dib.bitmap_data = NULL;
103         surface->resource.allocatedMemory = NULL;
104     }
105
106     if (surface->flags & SFLAG_USERPTR)
107         wined3d_surface_set_mem(surface, NULL);
108     if (surface->overlay_dest)
109         list_remove(&surface->overlay_entry);
110
111     LIST_FOR_EACH_ENTRY_SAFE(overlay, cur, &surface->overlays, struct wined3d_surface, overlay_entry)
112     {
113         list_remove(&overlay->overlay_entry);
114         overlay->overlay_dest = NULL;
115     }
116
117     resource_cleanup(&surface->resource);
118 }
119
120 void surface_update_draw_binding(struct wined3d_surface *surface)
121 {
122     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
123         surface->draw_binding = SFLAG_INDRAWABLE;
124     else if (surface->resource.multisample_type)
125         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
126     else
127         surface->draw_binding = SFLAG_INTEXTURE;
128 }
129
130 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
131 {
132     TRACE("surface %p, container %p.\n", surface, container);
133
134     if (!container && type != WINED3D_CONTAINER_NONE)
135         ERR("Setting NULL container of type %#x.\n", type);
136
137     if (type == WINED3D_CONTAINER_SWAPCHAIN)
138     {
139         surface->get_drawable_size = get_drawable_size_swapchain;
140     }
141     else
142     {
143         switch (wined3d_settings.offscreen_rendering_mode)
144         {
145             case ORM_FBO:
146                 surface->get_drawable_size = get_drawable_size_fbo;
147                 break;
148
149             case ORM_BACKBUFFER:
150                 surface->get_drawable_size = get_drawable_size_backbuffer;
151                 break;
152
153             default:
154                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
155                 return;
156         }
157     }
158
159     surface->container.type = type;
160     surface->container.u.base = container;
161     surface_update_draw_binding(surface);
162 }
163
164 struct blt_info
165 {
166     GLenum binding;
167     GLenum bind_target;
168     enum tex_types tex_type;
169     GLfloat coords[4][3];
170 };
171
172 struct float_rect
173 {
174     float l;
175     float t;
176     float r;
177     float b;
178 };
179
180 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
181 {
182     f->l = ((r->left * 2.0f) / w) - 1.0f;
183     f->t = ((r->top * 2.0f) / h) - 1.0f;
184     f->r = ((r->right * 2.0f) / w) - 1.0f;
185     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
186 }
187
188 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
189 {
190     GLfloat (*coords)[3] = info->coords;
191     struct float_rect f;
192
193     switch (target)
194     {
195         default:
196             FIXME("Unsupported texture target %#x\n", target);
197             /* Fall back to GL_TEXTURE_2D */
198         case GL_TEXTURE_2D:
199             info->binding = GL_TEXTURE_BINDING_2D;
200             info->bind_target = GL_TEXTURE_2D;
201             info->tex_type = tex_2d;
202             coords[0][0] = (float)rect->left / w;
203             coords[0][1] = (float)rect->top / h;
204             coords[0][2] = 0.0f;
205
206             coords[1][0] = (float)rect->right / w;
207             coords[1][1] = (float)rect->top / h;
208             coords[1][2] = 0.0f;
209
210             coords[2][0] = (float)rect->left / w;
211             coords[2][1] = (float)rect->bottom / h;
212             coords[2][2] = 0.0f;
213
214             coords[3][0] = (float)rect->right / w;
215             coords[3][1] = (float)rect->bottom / h;
216             coords[3][2] = 0.0f;
217             break;
218
219         case GL_TEXTURE_RECTANGLE_ARB:
220             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
221             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
222             info->tex_type = tex_rect;
223             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
224             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
225             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
226             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
227             break;
228
229         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
230             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
231             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
232             info->tex_type = tex_cube;
233             cube_coords_float(rect, w, h, &f);
234
235             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
236             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
237             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
238             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
239             break;
240
241         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
242             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
243             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
244             info->tex_type = tex_cube;
245             cube_coords_float(rect, w, h, &f);
246
247             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
248             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
249             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
250             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
251             break;
252
253         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
254             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
255             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
256             info->tex_type = tex_cube;
257             cube_coords_float(rect, w, h, &f);
258
259             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
260             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
261             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
262             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
263             break;
264
265         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
266             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
267             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
268             info->tex_type = tex_cube;
269             cube_coords_float(rect, w, h, &f);
270
271             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
272             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
273             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
274             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
275             break;
276
277         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
278             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
279             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
280             info->tex_type = tex_cube;
281             cube_coords_float(rect, w, h, &f);
282
283             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
284             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
285             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
286             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
287             break;
288
289         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
290             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
291             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
292             info->tex_type = tex_cube;
293             cube_coords_float(rect, w, h, &f);
294
295             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
296             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
297             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
298             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
299             break;
300     }
301 }
302
303 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
304 {
305     if (rect_in)
306         *rect_out = *rect_in;
307     else
308     {
309         rect_out->left = 0;
310         rect_out->top = 0;
311         rect_out->right = surface->resource.width;
312         rect_out->bottom = surface->resource.height;
313     }
314 }
315
316 /* GL locking and context activation is done by the caller */
317 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
318         const RECT *src_rect, const RECT *dst_rect, enum wined3d_texture_filter_type filter)
319 {
320     struct blt_info info;
321
322     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
323
324     glEnable(info.bind_target);
325     checkGLcall("glEnable(bind_target)");
326
327     context_bind_texture(context, info.bind_target, src_surface->texture_name);
328
329     /* Filtering for StretchRect */
330     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
331             wined3d_gl_mag_filter(magLookup, filter));
332     checkGLcall("glTexParameteri");
333     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
334             wined3d_gl_min_mip_filter(minMipLookup, filter, WINED3D_TEXF_NONE));
335     checkGLcall("glTexParameteri");
336     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
337     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
338     if (context->gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
339         glTexParameteri(info.bind_target, GL_TEXTURE_SRGB_DECODE_EXT, GL_SKIP_DECODE_EXT);
340     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
341     checkGLcall("glTexEnvi");
342
343     /* Draw a quad */
344     glBegin(GL_TRIANGLE_STRIP);
345     glTexCoord3fv(info.coords[0]);
346     glVertex2i(dst_rect->left, dst_rect->top);
347
348     glTexCoord3fv(info.coords[1]);
349     glVertex2i(dst_rect->right, dst_rect->top);
350
351     glTexCoord3fv(info.coords[2]);
352     glVertex2i(dst_rect->left, dst_rect->bottom);
353
354     glTexCoord3fv(info.coords[3]);
355     glVertex2i(dst_rect->right, dst_rect->bottom);
356     glEnd();
357
358     /* Unbind the texture */
359     context_bind_texture(context, info.bind_target, 0);
360
361     /* We changed the filtering settings on the texture. Inform the
362      * container about this to get the filters reset properly next draw. */
363     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
364     {
365         struct wined3d_texture *texture = src_surface->container.u.texture;
366         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3D_TEXF_POINT;
367         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3D_TEXF_POINT;
368         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3D_TEXF_NONE;
369         texture->texture_rgb.states[WINED3DTEXSTA_SRGBTEXTURE] = FALSE;
370     }
371 }
372
373 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
374 {
375     const struct wined3d_format *format = surface->resource.format;
376     SYSTEM_INFO sysInfo;
377     BITMAPINFO *b_info;
378     int extraline = 0;
379     DWORD *masks;
380     UINT usage;
381     HDC dc;
382
383     TRACE("surface %p.\n", surface);
384
385     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
386     {
387         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
388         return WINED3DERR_INVALIDCALL;
389     }
390
391     switch (format->byte_count)
392     {
393         case 2:
394         case 4:
395             /* Allocate extra space to store the RGB bit masks. */
396             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
397             break;
398
399         case 3:
400             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
401             break;
402
403         default:
404             /* Allocate extra space for a palette. */
405             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
406                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
407             break;
408     }
409
410     if (!b_info)
411         return E_OUTOFMEMORY;
412
413     /* Some applications access the surface in via DWORDs, and do not take
414      * the necessary care at the end of the surface. So we need at least
415      * 4 extra bytes at the end of the surface. Check against the page size,
416      * if the last page used for the surface has at least 4 spare bytes we're
417      * safe, otherwise add an extra line to the DIB section. */
418     GetSystemInfo(&sysInfo);
419     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
420     {
421         extraline = 1;
422         TRACE("Adding an extra line to the DIB section.\n");
423     }
424
425     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
426     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
427     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
428     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
429     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
430             * wined3d_surface_get_pitch(surface);
431     b_info->bmiHeader.biPlanes = 1;
432     b_info->bmiHeader.biBitCount = format->byte_count * 8;
433
434     b_info->bmiHeader.biXPelsPerMeter = 0;
435     b_info->bmiHeader.biYPelsPerMeter = 0;
436     b_info->bmiHeader.biClrUsed = 0;
437     b_info->bmiHeader.biClrImportant = 0;
438
439     /* Get the bit masks */
440     masks = (DWORD *)b_info->bmiColors;
441     switch (surface->resource.format->id)
442     {
443         case WINED3DFMT_B8G8R8_UNORM:
444             usage = DIB_RGB_COLORS;
445             b_info->bmiHeader.biCompression = BI_RGB;
446             break;
447
448         case WINED3DFMT_B5G5R5X1_UNORM:
449         case WINED3DFMT_B5G5R5A1_UNORM:
450         case WINED3DFMT_B4G4R4A4_UNORM:
451         case WINED3DFMT_B4G4R4X4_UNORM:
452         case WINED3DFMT_B2G3R3_UNORM:
453         case WINED3DFMT_B2G3R3A8_UNORM:
454         case WINED3DFMT_R10G10B10A2_UNORM:
455         case WINED3DFMT_R8G8B8A8_UNORM:
456         case WINED3DFMT_R8G8B8X8_UNORM:
457         case WINED3DFMT_B10G10R10A2_UNORM:
458         case WINED3DFMT_B5G6R5_UNORM:
459         case WINED3DFMT_R16G16B16A16_UNORM:
460             usage = 0;
461             b_info->bmiHeader.biCompression = BI_BITFIELDS;
462             masks[0] = format->red_mask;
463             masks[1] = format->green_mask;
464             masks[2] = format->blue_mask;
465             break;
466
467         default:
468             /* Don't know palette */
469             b_info->bmiHeader.biCompression = BI_RGB;
470             usage = 0;
471             break;
472     }
473
474     if (!(dc = GetDC(0)))
475     {
476         HeapFree(GetProcessHeap(), 0, b_info);
477         return HRESULT_FROM_WIN32(GetLastError());
478     }
479
480     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
481             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
482             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
483     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
484     ReleaseDC(0, dc);
485
486     if (!surface->dib.DIBsection)
487     {
488         ERR("Failed to create DIB section.\n");
489         HeapFree(GetProcessHeap(), 0, b_info);
490         return HRESULT_FROM_WIN32(GetLastError());
491     }
492
493     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
494     /* Copy the existing surface to the dib section. */
495     if (surface->resource.allocatedMemory)
496     {
497         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
498                 surface->resource.height * wined3d_surface_get_pitch(surface));
499     }
500     else
501     {
502         /* This is to make maps read the GL texture although memory is allocated. */
503         surface->flags &= ~SFLAG_INSYSMEM;
504     }
505     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
506
507     HeapFree(GetProcessHeap(), 0, b_info);
508
509     /* Now allocate a DC. */
510     surface->hDC = CreateCompatibleDC(0);
511     SelectObject(surface->hDC, surface->dib.DIBsection);
512     TRACE("Using wined3d palette %p.\n", surface->palette);
513     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
514
515     surface->flags |= SFLAG_DIBSECTION;
516
517     return WINED3D_OK;
518 }
519
520 static BOOL surface_need_pbo(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
521 {
522     if (surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
523         return FALSE;
524     if (!(surface->flags & SFLAG_DYNLOCK))
525         return FALSE;
526     if (surface->flags & (SFLAG_CONVERTED | SFLAG_NONPOW2 | SFLAG_PIN_SYSMEM))
527         return FALSE;
528     if (!gl_info->supported[ARB_PIXEL_BUFFER_OBJECT])
529         return FALSE;
530
531     return TRUE;
532 }
533
534 static void surface_load_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
535 {
536     struct wined3d_context *context;
537     GLenum error;
538
539     context = context_acquire(surface->resource.device, NULL);
540     ENTER_GL();
541
542     GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
543     error = glGetError();
544     if (!surface->pbo || error != GL_NO_ERROR)
545         ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
546
547     TRACE("Binding PBO %u.\n", surface->pbo);
548
549     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
550     checkGLcall("glBindBufferARB");
551
552     GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
553             surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
554     checkGLcall("glBufferDataARB");
555
556     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
557     checkGLcall("glBindBufferARB");
558
559     /* We don't need the system memory anymore and we can't even use it for PBOs. */
560     if (!(surface->flags & SFLAG_CLIENT))
561     {
562         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
563         surface->resource.heapMemory = NULL;
564     }
565     surface->resource.allocatedMemory = NULL;
566     surface->flags |= SFLAG_PBO;
567     LEAVE_GL();
568     context_release(context);
569 }
570
571 static void surface_prepare_system_memory(struct wined3d_surface *surface)
572 {
573     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
574
575     TRACE("surface %p.\n", surface);
576
577     if (!(surface->flags & SFLAG_PBO) && surface_need_pbo(surface, gl_info))
578         surface_load_pbo(surface, gl_info);
579     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
580     {
581         /* Whatever surface we have, make sure that there is memory allocated
582          * for the downloaded copy, or a PBO to map. */
583         if (!surface->resource.heapMemory)
584             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
585
586         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
587                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
588
589         if (surface->flags & SFLAG_INSYSMEM)
590             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
591     }
592 }
593
594 static void surface_evict_sysmem(struct wined3d_surface *surface)
595 {
596     if (surface->flags & SFLAG_DONOTFREE)
597         return;
598
599     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
600     surface->resource.allocatedMemory = NULL;
601     surface->resource.heapMemory = NULL;
602     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
603 }
604
605 /* Context activation is done by the caller. */
606 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
607         struct wined3d_context *context, BOOL srgb)
608 {
609     struct wined3d_device *device = surface->resource.device;
610     DWORD active_sampler;
611
612     /* We don't need a specific texture unit, but after binding the texture
613      * the current unit is dirty. Read the unit back instead of switching to
614      * 0, this avoids messing around with the state manager's GL states. The
615      * current texture unit should always be a valid one.
616      *
617      * To be more specific, this is tricky because we can implicitly be
618      * called from sampler() in state.c. This means we can't touch anything
619      * other than whatever happens to be the currently active texture, or we
620      * would risk marking already applied sampler states dirty again. */
621     active_sampler = device->rev_tex_unit_map[context->active_texture];
622
623     if (active_sampler != WINED3D_UNMAPPED_STAGE)
624         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
625     surface_bind(surface, context, srgb);
626 }
627
628 static void surface_force_reload(struct wined3d_surface *surface)
629 {
630     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
631 }
632
633 static void surface_release_client_storage(struct wined3d_surface *surface)
634 {
635     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
636
637     ENTER_GL();
638     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
639     if (surface->texture_name)
640     {
641         surface_bind_and_dirtify(surface, context, FALSE);
642         glTexImage2D(surface->texture_target, surface->texture_level,
643                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
644     }
645     if (surface->texture_name_srgb)
646     {
647         surface_bind_and_dirtify(surface, context, TRUE);
648         glTexImage2D(surface->texture_target, surface->texture_level,
649                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
650     }
651     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
652     LEAVE_GL();
653
654     context_release(context);
655
656     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
657     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
658     surface_force_reload(surface);
659 }
660
661 static HRESULT surface_private_setup(struct wined3d_surface *surface)
662 {
663     /* TODO: Check against the maximum texture sizes supported by the video card. */
664     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
665     unsigned int pow2Width, pow2Height;
666
667     TRACE("surface %p.\n", surface);
668
669     surface->texture_name = 0;
670     surface->texture_target = GL_TEXTURE_2D;
671
672     /* Non-power2 support */
673     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
674     {
675         pow2Width = surface->resource.width;
676         pow2Height = surface->resource.height;
677     }
678     else
679     {
680         /* Find the nearest pow2 match */
681         pow2Width = pow2Height = 1;
682         while (pow2Width < surface->resource.width)
683             pow2Width <<= 1;
684         while (pow2Height < surface->resource.height)
685             pow2Height <<= 1;
686     }
687     surface->pow2Width = pow2Width;
688     surface->pow2Height = pow2Height;
689
690     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
691     {
692         /* TODO: Add support for non power two compressed textures. */
693         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
694         {
695             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
696                   surface, surface->resource.width, surface->resource.height);
697             return WINED3DERR_NOTAVAILABLE;
698         }
699     }
700
701     if (pow2Width != surface->resource.width
702             || pow2Height != surface->resource.height)
703     {
704         surface->flags |= SFLAG_NONPOW2;
705     }
706
707     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
708             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
709     {
710         /* One of three options:
711          * 1: Do the same as we do with NPOT and scale the texture, (any
712          *    texture ops would require the texture to be scaled which is
713          *    potentially slow)
714          * 2: Set the texture to the maximum size (bad idea).
715          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
716          * 4: Create the surface, but allow it to be used only for DirectDraw
717          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
718          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
719          *    the render target. */
720         if (surface->resource.pool == WINED3D_POOL_DEFAULT || surface->resource.pool == WINED3D_POOL_MANAGED)
721         {
722             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
723             return WINED3DERR_NOTAVAILABLE;
724         }
725
726         /* We should never use this surface in combination with OpenGL! */
727         TRACE("Creating an oversized surface: %ux%u.\n",
728                 surface->pow2Width, surface->pow2Height);
729     }
730     else
731     {
732         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
733          * and EXT_PALETTED_TEXTURE is used in combination with texture
734          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
735          * EXT_PALETTED_TEXTURE doesn't work in combination with
736          * ARB_TEXTURE_RECTANGLE. */
737         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
738                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
739                 && gl_info->supported[EXT_PALETTED_TEXTURE]
740                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
741         {
742             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
743             surface->pow2Width = surface->resource.width;
744             surface->pow2Height = surface->resource.height;
745             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
746         }
747     }
748
749     switch (wined3d_settings.offscreen_rendering_mode)
750     {
751         case ORM_FBO:
752             surface->get_drawable_size = get_drawable_size_fbo;
753             break;
754
755         case ORM_BACKBUFFER:
756             surface->get_drawable_size = get_drawable_size_backbuffer;
757             break;
758
759         default:
760             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
761             return WINED3DERR_INVALIDCALL;
762     }
763
764     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
765         surface->flags |= SFLAG_DISCARDED;
766
767     return WINED3D_OK;
768 }
769
770 static void surface_realize_palette(struct wined3d_surface *surface)
771 {
772     struct wined3d_palette *palette = surface->palette;
773
774     TRACE("surface %p.\n", surface);
775
776     if (!palette) return;
777
778     if (surface->resource.format->id == WINED3DFMT_P8_UINT
779             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
780     {
781         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
782         {
783             /* Make sure the texture is up to date. This call doesn't do
784              * anything if the texture is already up to date. */
785             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
786
787             /* We want to force a palette refresh, so mark the drawable as not being up to date */
788             if (!surface_is_offscreen(surface))
789                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
790         }
791         else
792         {
793             if (!(surface->flags & SFLAG_INSYSMEM))
794             {
795                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
796                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
797             }
798             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
799         }
800     }
801
802     if (surface->flags & SFLAG_DIBSECTION)
803     {
804         RGBQUAD col[256];
805         unsigned int i;
806
807         TRACE("Updating the DC's palette.\n");
808
809         for (i = 0; i < 256; ++i)
810         {
811             col[i].rgbRed   = palette->palents[i].peRed;
812             col[i].rgbGreen = palette->palents[i].peGreen;
813             col[i].rgbBlue  = palette->palents[i].peBlue;
814             col[i].rgbReserved = 0;
815         }
816         SetDIBColorTable(surface->hDC, 0, 256, col);
817     }
818
819     /* Propagate the changes to the drawable when we have a palette. */
820     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
821         surface_load_location(surface, surface->draw_binding, NULL);
822 }
823
824 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
825 {
826     HRESULT hr;
827
828     /* If there's no destination surface there is nothing to do. */
829     if (!surface->overlay_dest)
830         return WINED3D_OK;
831
832     /* Blt calls ModifyLocation on the dest surface, which in turn calls
833      * DrawOverlay to update the overlay. Prevent an endless recursion. */
834     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
835         return WINED3D_OK;
836
837     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
838     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
839             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3D_TEXF_LINEAR);
840     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
841
842     return hr;
843 }
844
845 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
846 {
847     struct wined3d_device *device = surface->resource.device;
848     const RECT *pass_rect = rect;
849
850     TRACE("surface %p, rect %s, flags %#x.\n",
851             surface, wine_dbgstr_rect(rect), flags);
852
853     if (flags & WINED3DLOCK_DISCARD)
854     {
855         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
856         surface_prepare_system_memory(surface);
857         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
858     }
859     else
860     {
861         /* surface_load_location() does not check if the rectangle specifies
862          * the full surface. Most callers don't need that, so do it here. */
863         if (rect && !rect->top && !rect->left
864                 && rect->right == surface->resource.width
865                 && rect->bottom == surface->resource.height)
866             pass_rect = NULL;
867         surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
868     }
869
870     if (surface->flags & SFLAG_PBO)
871     {
872         const struct wined3d_gl_info *gl_info;
873         struct wined3d_context *context;
874
875         context = context_acquire(device, NULL);
876         gl_info = context->gl_info;
877
878         ENTER_GL();
879         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
880         checkGLcall("glBindBufferARB");
881
882         /* This shouldn't happen but could occur if some other function
883          * didn't handle the PBO properly. */
884         if (surface->resource.allocatedMemory)
885             ERR("The surface already has PBO memory allocated.\n");
886
887         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
888         checkGLcall("glMapBufferARB");
889
890         /* Make sure the PBO isn't set anymore in order not to break non-PBO
891          * calls. */
892         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
893         checkGLcall("glBindBufferARB");
894
895         LEAVE_GL();
896         context_release(context);
897     }
898
899     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
900     {
901         if (!rect)
902             surface_add_dirty_rect(surface, NULL);
903         else
904         {
905             struct wined3d_box b;
906
907             b.left = rect->left;
908             b.top = rect->top;
909             b.right = rect->right;
910             b.bottom = rect->bottom;
911             b.front = 0;
912             b.back = 1;
913             surface_add_dirty_rect(surface, &b);
914         }
915     }
916 }
917
918 static void surface_unmap(struct wined3d_surface *surface)
919 {
920     struct wined3d_device *device = surface->resource.device;
921     BOOL fullsurface;
922
923     TRACE("surface %p.\n", surface);
924
925     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
926
927     if (surface->flags & SFLAG_PBO)
928     {
929         const struct wined3d_gl_info *gl_info;
930         struct wined3d_context *context;
931
932         TRACE("Freeing PBO memory.\n");
933
934         context = context_acquire(device, NULL);
935         gl_info = context->gl_info;
936
937         ENTER_GL();
938         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
939         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
940         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
941         checkGLcall("glUnmapBufferARB");
942         LEAVE_GL();
943         context_release(context);
944
945         surface->resource.allocatedMemory = NULL;
946     }
947
948     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
949
950     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
951     {
952         TRACE("Not dirtified, nothing to do.\n");
953         goto done;
954     }
955
956     /* FIXME: The ORM_BACKBUFFER case probably isn't needed, but who knows
957      * what obscure bugs in backbuffer ORM removing it will uncover. Also,
958      * this should only be needed for the frontbuffer, but that requires
959      * present calls to call surface_load_location() on the backbuffer.
960      * Fix both of those after 1.4. */
961     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
962             || (wined3d_settings.offscreen_rendering_mode == ORM_BACKBUFFER
963             && device->fb.render_targets && surface == device->fb.render_targets[0]))
964     {
965         if (!surface->dirtyRect.left && !surface->dirtyRect.top
966                 && surface->dirtyRect.right == surface->resource.width
967                 && surface->dirtyRect.bottom == surface->resource.height)
968         {
969             fullsurface = TRUE;
970         }
971         else
972         {
973             /* TODO: Proper partial rectangle tracking. */
974             fullsurface = FALSE;
975             surface->flags |= SFLAG_INSYSMEM;
976         }
977
978         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
979
980         /* Partial rectangle tracking is not commonly implemented, it is only
981          * done for render targets. INSYSMEM was set before to tell
982          * surface_load_location() where to read the rectangle from.
983          * Indrawable is set because all modifications from the partial
984          * sysmem copy are written back to the drawable, thus the surface is
985          * merged again in the drawable. The sysmem copy is not fully up to
986          * date because only a subrectangle was read in Map(). */
987         if (!fullsurface)
988         {
989             surface_modify_location(surface, surface->draw_binding, TRUE);
990             surface_evict_sysmem(surface);
991         }
992
993         surface->dirtyRect.left = surface->resource.width;
994         surface->dirtyRect.top = surface->resource.height;
995         surface->dirtyRect.right = 0;
996         surface->dirtyRect.bottom = 0;
997     }
998     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
999     {
1000         FIXME("Depth / stencil buffer locking is not implemented.\n");
1001     }
1002
1003 done:
1004     /* Overlays have to be redrawn manually after changes with the GL implementation */
1005     if (surface->overlay_dest)
1006         surface_draw_overlay(surface);
1007 }
1008
1009 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1010 {
1011     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1012         return FALSE;
1013     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1014         return FALSE;
1015     return TRUE;
1016 }
1017
1018 static void wined3d_surface_depth_blt_fbo(const struct wined3d_device *device, struct wined3d_surface *src_surface,
1019         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1020 {
1021     const struct wined3d_gl_info *gl_info;
1022     struct wined3d_context *context;
1023     DWORD src_mask, dst_mask;
1024     GLbitfield gl_mask;
1025
1026     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1027             device, src_surface, wine_dbgstr_rect(src_rect),
1028             dst_surface, wine_dbgstr_rect(dst_rect));
1029
1030     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1031     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1032
1033     if (src_mask != dst_mask)
1034     {
1035         ERR("Incompatible formats %s and %s.\n",
1036                 debug_d3dformat(src_surface->resource.format->id),
1037                 debug_d3dformat(dst_surface->resource.format->id));
1038         return;
1039     }
1040
1041     if (!src_mask)
1042     {
1043         ERR("Not a depth / stencil format: %s.\n",
1044                 debug_d3dformat(src_surface->resource.format->id));
1045         return;
1046     }
1047
1048     gl_mask = 0;
1049     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1050         gl_mask |= GL_DEPTH_BUFFER_BIT;
1051     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1052         gl_mask |= GL_STENCIL_BUFFER_BIT;
1053
1054     /* Make sure the locations are up-to-date. Loading the destination
1055      * surface isn't required if the entire surface is overwritten. */
1056     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1057     if (!surface_is_full_rect(dst_surface, dst_rect))
1058         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1059
1060     context = context_acquire(device, NULL);
1061     if (!context->valid)
1062     {
1063         context_release(context);
1064         WARN("Invalid context, skipping blit.\n");
1065         return;
1066     }
1067
1068     gl_info = context->gl_info;
1069
1070     ENTER_GL();
1071
1072     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1073     glReadBuffer(GL_NONE);
1074     checkGLcall("glReadBuffer()");
1075     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1076
1077     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1078     context_set_draw_buffer(context, GL_NONE);
1079     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1080     context_invalidate_state(context, STATE_FRAMEBUFFER);
1081
1082     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1083     {
1084         glDepthMask(GL_TRUE);
1085         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_ZWRITEENABLE));
1086     }
1087     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1088     {
1089         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1090         {
1091             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1092             context_invalidate_state(context, STATE_RENDER(WINED3D_RS_TWOSIDEDSTENCILMODE));
1093         }
1094         glStencilMask(~0U);
1095         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_STENCILWRITEMASK));
1096     }
1097
1098     glDisable(GL_SCISSOR_TEST);
1099     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1100
1101     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1102             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1103     checkGLcall("glBlitFramebuffer()");
1104
1105     LEAVE_GL();
1106
1107     if (wined3d_settings.strict_draw_ordering)
1108         wglFlush(); /* Flush to ensure ordering across contexts. */
1109
1110     context_release(context);
1111 }
1112
1113 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1114  * Depth / stencil is not supported. */
1115 static void surface_blt_fbo(const struct wined3d_device *device, enum wined3d_texture_filter_type filter,
1116         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1117         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1118 {
1119     const struct wined3d_gl_info *gl_info;
1120     struct wined3d_context *context;
1121     RECT src_rect, dst_rect;
1122     GLenum gl_filter;
1123     GLenum buffer;
1124
1125     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1126     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1127             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1128     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1129             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1130
1131     src_rect = *src_rect_in;
1132     dst_rect = *dst_rect_in;
1133
1134     switch (filter)
1135     {
1136         case WINED3D_TEXF_LINEAR:
1137             gl_filter = GL_LINEAR;
1138             break;
1139
1140         default:
1141             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1142         case WINED3D_TEXF_NONE:
1143         case WINED3D_TEXF_POINT:
1144             gl_filter = GL_NEAREST;
1145             break;
1146     }
1147
1148     /* Resolve the source surface first if needed. */
1149     if (src_location == SFLAG_INRB_MULTISAMPLE
1150             && (src_surface->resource.format->id != dst_surface->resource.format->id
1151                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1152                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1153         src_location = SFLAG_INRB_RESOLVED;
1154
1155     /* Make sure the locations are up-to-date. Loading the destination
1156      * surface isn't required if the entire surface is overwritten. (And is
1157      * in fact harmful if we're being called by surface_load_location() with
1158      * the purpose of loading the destination surface.) */
1159     surface_load_location(src_surface, src_location, NULL);
1160     if (!surface_is_full_rect(dst_surface, &dst_rect))
1161         surface_load_location(dst_surface, dst_location, NULL);
1162
1163     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1164     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1165     else context = context_acquire(device, NULL);
1166
1167     if (!context->valid)
1168     {
1169         context_release(context);
1170         WARN("Invalid context, skipping blit.\n");
1171         return;
1172     }
1173
1174     gl_info = context->gl_info;
1175
1176     if (src_location == SFLAG_INDRAWABLE)
1177     {
1178         TRACE("Source surface %p is onscreen.\n", src_surface);
1179         buffer = surface_get_gl_buffer(src_surface);
1180         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1181     }
1182     else
1183     {
1184         TRACE("Source surface %p is offscreen.\n", src_surface);
1185         buffer = GL_COLOR_ATTACHMENT0;
1186     }
1187
1188     ENTER_GL();
1189     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1190     glReadBuffer(buffer);
1191     checkGLcall("glReadBuffer()");
1192     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1193     LEAVE_GL();
1194
1195     if (dst_location == SFLAG_INDRAWABLE)
1196     {
1197         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1198         buffer = surface_get_gl_buffer(dst_surface);
1199         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1200     }
1201     else
1202     {
1203         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1204         buffer = GL_COLOR_ATTACHMENT0;
1205     }
1206
1207     ENTER_GL();
1208     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1209     context_set_draw_buffer(context, buffer);
1210     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1211     context_invalidate_state(context, STATE_FRAMEBUFFER);
1212
1213     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1214     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE));
1215     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE1));
1216     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE2));
1217     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE3));
1218
1219     glDisable(GL_SCISSOR_TEST);
1220     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1221
1222     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1223             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1224     checkGLcall("glBlitFramebuffer()");
1225
1226     LEAVE_GL();
1227
1228     if (wined3d_settings.strict_draw_ordering
1229             || (dst_location == SFLAG_INDRAWABLE
1230             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1231         wglFlush();
1232
1233     context_release(context);
1234 }
1235
1236 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1237         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
1238         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
1239 {
1240     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1241         return FALSE;
1242
1243     /* Source and/or destination need to be on the GL side */
1244     if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM)
1245         return FALSE;
1246
1247     switch (blit_op)
1248     {
1249         case WINED3D_BLIT_OP_COLOR_BLIT:
1250             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1251                 return FALSE;
1252             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1253                 return FALSE;
1254             break;
1255
1256         case WINED3D_BLIT_OP_DEPTH_BLIT:
1257             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1258                 return FALSE;
1259             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1260                 return FALSE;
1261             break;
1262
1263         default:
1264             return FALSE;
1265     }
1266
1267     if (!(src_format->id == dst_format->id
1268             || (is_identity_fixup(src_format->color_fixup)
1269             && is_identity_fixup(dst_format->color_fixup))))
1270         return FALSE;
1271
1272     return TRUE;
1273 }
1274
1275 /* This function checks if the primary render target uses the 8bit paletted format. */
1276 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1277 {
1278     if (device->fb.render_targets && device->fb.render_targets[0])
1279     {
1280         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1281         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1282                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1283             return TRUE;
1284     }
1285     return FALSE;
1286 }
1287
1288 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1289         DWORD color, struct wined3d_color *float_color)
1290 {
1291     const struct wined3d_format *format = surface->resource.format;
1292     const struct wined3d_device *device = surface->resource.device;
1293
1294     switch (format->id)
1295     {
1296         case WINED3DFMT_P8_UINT:
1297             if (surface->palette)
1298             {
1299                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1300                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1301                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1302             }
1303             else
1304             {
1305                 float_color->r = 0.0f;
1306                 float_color->g = 0.0f;
1307                 float_color->b = 0.0f;
1308             }
1309             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1310             break;
1311
1312         case WINED3DFMT_B5G6R5_UNORM:
1313             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1314             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1315             float_color->b = (color & 0x1f) / 31.0f;
1316             float_color->a = 1.0f;
1317             break;
1318
1319         case WINED3DFMT_B8G8R8_UNORM:
1320         case WINED3DFMT_B8G8R8X8_UNORM:
1321             float_color->r = D3DCOLOR_R(color);
1322             float_color->g = D3DCOLOR_G(color);
1323             float_color->b = D3DCOLOR_B(color);
1324             float_color->a = 1.0f;
1325             break;
1326
1327         case WINED3DFMT_B8G8R8A8_UNORM:
1328             float_color->r = D3DCOLOR_R(color);
1329             float_color->g = D3DCOLOR_G(color);
1330             float_color->b = D3DCOLOR_B(color);
1331             float_color->a = D3DCOLOR_A(color);
1332             break;
1333
1334         default:
1335             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1336             return FALSE;
1337     }
1338
1339     return TRUE;
1340 }
1341
1342 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1343 {
1344     const struct wined3d_format *format = surface->resource.format;
1345
1346     switch (format->id)
1347     {
1348         case WINED3DFMT_S1_UINT_D15_UNORM:
1349             *float_depth = depth / (float)0x00007fff;
1350             break;
1351
1352         case WINED3DFMT_D16_UNORM:
1353             *float_depth = depth / (float)0x0000ffff;
1354             break;
1355
1356         case WINED3DFMT_D24_UNORM_S8_UINT:
1357         case WINED3DFMT_X8D24_UNORM:
1358             *float_depth = depth / (float)0x00ffffff;
1359             break;
1360
1361         case WINED3DFMT_D32_UNORM:
1362             *float_depth = depth / (float)0xffffffff;
1363             break;
1364
1365         default:
1366             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1367             return FALSE;
1368     }
1369
1370     return TRUE;
1371 }
1372
1373 /* Do not call while under the GL lock. */
1374 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1375 {
1376     const struct wined3d_resource *resource = &surface->resource;
1377     struct wined3d_device *device = resource->device;
1378     const struct blit_shader *blitter;
1379
1380     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1381             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1382     if (!blitter)
1383     {
1384         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1385         return WINED3DERR_INVALIDCALL;
1386     }
1387
1388     return blitter->depth_fill(device, surface, rect, depth);
1389 }
1390
1391 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1392         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1393 {
1394     struct wined3d_device *device = src_surface->resource.device;
1395
1396     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1397             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1398             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1399         return WINED3DERR_INVALIDCALL;
1400
1401     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1402
1403     surface_modify_ds_location(dst_surface, SFLAG_INTEXTURE,
1404             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1405
1406     return WINED3D_OK;
1407 }
1408
1409 /* Do not call while under the GL lock. */
1410 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1411         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1412         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter)
1413 {
1414     struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1415     struct wined3d_device *device = dst_surface->resource.device;
1416     DWORD src_ds_flags, dst_ds_flags;
1417     RECT src_rect, dst_rect;
1418     BOOL scale, convert;
1419
1420     static const DWORD simple_blit = WINEDDBLT_ASYNC
1421             | WINEDDBLT_COLORFILL
1422             | WINEDDBLT_WAIT
1423             | WINEDDBLT_DEPTHFILL
1424             | WINEDDBLT_DONOTWAIT;
1425
1426     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1427             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1428             flags, fx, debug_d3dtexturefiltertype(filter));
1429     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1430
1431     if (fx)
1432     {
1433         TRACE("dwSize %#x.\n", fx->dwSize);
1434         TRACE("dwDDFX %#x.\n", fx->dwDDFX);
1435         TRACE("dwROP %#x.\n", fx->dwROP);
1436         TRACE("dwDDROP %#x.\n", fx->dwDDROP);
1437         TRACE("dwRotationAngle %#x.\n", fx->dwRotationAngle);
1438         TRACE("dwZBufferOpCode %#x.\n", fx->dwZBufferOpCode);
1439         TRACE("dwZBufferLow %#x.\n", fx->dwZBufferLow);
1440         TRACE("dwZBufferHigh %#x.\n", fx->dwZBufferHigh);
1441         TRACE("dwZBufferBaseDest %#x.\n", fx->dwZBufferBaseDest);
1442         TRACE("dwZDestConstBitDepth %#x.\n", fx->dwZDestConstBitDepth);
1443         TRACE("lpDDSZBufferDest %p.\n", fx->u1.lpDDSZBufferDest);
1444         TRACE("dwZSrcConstBitDepth %#x.\n", fx->dwZSrcConstBitDepth);
1445         TRACE("lpDDSZBufferSrc %p.\n", fx->u2.lpDDSZBufferSrc);
1446         TRACE("dwAlphaEdgeBlendBitDepth %#x.\n", fx->dwAlphaEdgeBlendBitDepth);
1447         TRACE("dwAlphaEdgeBlend %#x.\n", fx->dwAlphaEdgeBlend);
1448         TRACE("dwReserved %#x.\n", fx->dwReserved);
1449         TRACE("dwAlphaDestConstBitDepth %#x.\n", fx->dwAlphaDestConstBitDepth);
1450         TRACE("lpDDSAlphaDest %p.\n", fx->u3.lpDDSAlphaDest);
1451         TRACE("dwAlphaSrcConstBitDepth %#x.\n", fx->dwAlphaSrcConstBitDepth);
1452         TRACE("lpDDSAlphaSrc %p.\n", fx->u4.lpDDSAlphaSrc);
1453         TRACE("lpDDSPattern %p.\n", fx->u5.lpDDSPattern);
1454         TRACE("ddckDestColorkey {%#x, %#x}.\n",
1455                 fx->ddckDestColorkey.color_space_low_value,
1456                 fx->ddckDestColorkey.color_space_high_value);
1457         TRACE("ddckSrcColorkey {%#x, %#x}.\n",
1458                 fx->ddckSrcColorkey.color_space_low_value,
1459                 fx->ddckSrcColorkey.color_space_high_value);
1460     }
1461
1462     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1463     {
1464         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1465         return WINEDDERR_SURFACEBUSY;
1466     }
1467
1468     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1469
1470     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1471             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1472             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1473             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1474             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1475     {
1476         WARN("The application gave us a bad destination rectangle.\n");
1477         return WINEDDERR_INVALIDRECT;
1478     }
1479
1480     if (src_surface)
1481     {
1482         surface_get_rect(src_surface, src_rect_in, &src_rect);
1483
1484         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1485                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1486                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1487                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1488                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1489         {
1490             WARN("Application gave us bad source rectangle for Blt.\n");
1491             return WINEDDERR_INVALIDRECT;
1492         }
1493     }
1494     else
1495     {
1496         memset(&src_rect, 0, sizeof(src_rect));
1497     }
1498
1499     if (!fx || !(fx->dwDDFX))
1500         flags &= ~WINEDDBLT_DDFX;
1501
1502     if (flags & WINEDDBLT_WAIT)
1503         flags &= ~WINEDDBLT_WAIT;
1504
1505     if (flags & WINEDDBLT_ASYNC)
1506     {
1507         static unsigned int once;
1508
1509         if (!once++)
1510             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1511         flags &= ~WINEDDBLT_ASYNC;
1512     }
1513
1514     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1515     if (flags & WINEDDBLT_DONOTWAIT)
1516     {
1517         static unsigned int once;
1518
1519         if (!once++)
1520             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1521         flags &= ~WINEDDBLT_DONOTWAIT;
1522     }
1523
1524     if (!device->d3d_initialized)
1525     {
1526         WARN("D3D not initialized, using fallback.\n");
1527         goto cpu;
1528     }
1529
1530     /* We want to avoid invalidating the sysmem location for converted
1531      * surfaces, since otherwise we'd have to convert the data back when
1532      * locking them. */
1533     if (dst_surface->flags & SFLAG_CONVERTED)
1534     {
1535         WARN("Converted surface, using CPU blit.\n");
1536         return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1537     }
1538
1539     if (flags & ~simple_blit)
1540     {
1541         WARN("Using fallback for complex blit (%#x).\n", flags);
1542         goto fallback;
1543     }
1544
1545     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1546         src_swapchain = src_surface->container.u.swapchain;
1547     else
1548         src_swapchain = NULL;
1549
1550     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1551         dst_swapchain = dst_surface->container.u.swapchain;
1552     else
1553         dst_swapchain = NULL;
1554
1555     /* This isn't strictly needed. FBO blits for example could deal with
1556      * cross-swapchain blits by first downloading the source to a texture
1557      * before switching to the destination context. We just have this here to
1558      * not have to deal with the issue, since cross-swapchain blits should be
1559      * rare. */
1560     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1561     {
1562         FIXME("Using fallback for cross-swapchain blit.\n");
1563         goto fallback;
1564     }
1565
1566     scale = src_surface
1567             && (src_rect.right - src_rect.left != dst_rect.right - dst_rect.left
1568             || src_rect.bottom - src_rect.top != dst_rect.bottom - dst_rect.top);
1569     convert = src_surface && src_surface->resource.format->id != dst_surface->resource.format->id;
1570
1571     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1572     if (src_surface)
1573         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1574     else
1575         src_ds_flags = 0;
1576
1577     if (src_ds_flags || dst_ds_flags)
1578     {
1579         if (flags & WINEDDBLT_DEPTHFILL)
1580         {
1581             float depth;
1582
1583             TRACE("Depth fill.\n");
1584
1585             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1586                 return WINED3DERR_INVALIDCALL;
1587
1588             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1589                 return WINED3D_OK;
1590         }
1591         else
1592         {
1593             if (src_ds_flags != dst_ds_flags)
1594             {
1595                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1596                 return WINED3DERR_INVALIDCALL;
1597             }
1598
1599             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1600                 return WINED3D_OK;
1601         }
1602     }
1603     else
1604     {
1605         /* In principle this would apply to depth blits as well, but we don't
1606          * implement those in the CPU blitter at the moment. */
1607         if ((dst_surface->flags & SFLAG_INSYSMEM)
1608                 && (!src_surface || (src_surface->flags & SFLAG_INSYSMEM)))
1609         {
1610             if (scale)
1611                 TRACE("Not doing sysmem blit because of scaling.\n");
1612             else if (convert)
1613                 TRACE("Not doing sysmem blit because of format conversion.\n");
1614             else
1615                 return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1616         }
1617
1618         if (flags & WINEDDBLT_COLORFILL)
1619         {
1620             struct wined3d_color color;
1621
1622             TRACE("Color fill.\n");
1623
1624             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1625                 goto fallback;
1626
1627             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1628                 return WINED3D_OK;
1629         }
1630         else
1631         {
1632             TRACE("Color blit.\n");
1633
1634             /* Upload */
1635             if ((src_surface->flags & SFLAG_INSYSMEM) && !(dst_surface->flags & SFLAG_INSYSMEM))
1636             {
1637                 if (scale)
1638                     TRACE("Not doing upload because of scaling.\n");
1639                 else if (convert)
1640                     TRACE("Not doing upload because of format conversion.\n");
1641                 else
1642                 {
1643                     POINT dst_point = {dst_rect.left, dst_rect.top};
1644
1645                     if (SUCCEEDED(surface_upload_from_surface(dst_surface, &dst_point, src_surface, &src_rect)))
1646                     {
1647                         if (!surface_is_offscreen(dst_surface))
1648                             surface_load_location(dst_surface, dst_surface->draw_binding, NULL);
1649                         return WINED3D_OK;
1650                     }
1651                 }
1652             }
1653
1654             /* Use present for back -> front blits. The idea behind this is
1655              * that present is potentially faster than a blit, in particular
1656              * when FBO blits aren't available. Some ddraw applications like
1657              * Half-Life and Prince of Persia 3D use Blt() from the backbuffer
1658              * to the frontbuffer instead of doing a Flip(). D3D8 and D3D9
1659              * applications can't blit directly to the frontbuffer. */
1660             if (dst_swapchain && dst_swapchain->back_buffers
1661                     && dst_surface == dst_swapchain->front_buffer
1662                     && src_surface == dst_swapchain->back_buffers[0])
1663             {
1664                 enum wined3d_swap_effect swap_effect = dst_swapchain->desc.swap_effect;
1665
1666                 TRACE("Using present for backbuffer -> frontbuffer blit.\n");
1667
1668                 /* Set the swap effect to COPY, we don't want the backbuffer
1669                  * to become undefined. */
1670                 dst_swapchain->desc.swap_effect = WINED3D_SWAP_EFFECT_COPY;
1671                 wined3d_swapchain_present(dst_swapchain, NULL, NULL, dst_swapchain->win_handle, NULL, 0);
1672                 dst_swapchain->desc.swap_effect = swap_effect;
1673
1674                 return WINED3D_OK;
1675             }
1676
1677             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1678                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1679                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1680             {
1681                 TRACE("Using FBO blit.\n");
1682
1683                 surface_blt_fbo(device, filter,
1684                         src_surface, src_surface->draw_binding, &src_rect,
1685                         dst_surface, dst_surface->draw_binding, &dst_rect);
1686                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1687                 return WINED3D_OK;
1688             }
1689
1690             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1691                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1692                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1693             {
1694                 TRACE("Using arbfp blit.\n");
1695
1696                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1697                     return WINED3D_OK;
1698             }
1699         }
1700     }
1701
1702 fallback:
1703
1704     /* Special cases for render targets. */
1705     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1706             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1707     {
1708         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1709                 src_surface, &src_rect, flags, fx, filter)))
1710             return WINED3D_OK;
1711     }
1712
1713 cpu:
1714
1715     /* For the rest call the X11 surface implementation. For render targets
1716      * this should be implemented OpenGL accelerated in BltOverride, other
1717      * blits are rather rare. */
1718     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1719 }
1720
1721 HRESULT CDECL wined3d_surface_get_render_target_data(struct wined3d_surface *surface,
1722         struct wined3d_surface *render_target)
1723 {
1724     TRACE("surface %p, render_target %p.\n", surface, render_target);
1725
1726     /* TODO: Check surface sizes, pools, etc. */
1727
1728     if (render_target->resource.multisample_type)
1729         return WINED3DERR_INVALIDCALL;
1730
1731     return wined3d_surface_blt(surface, NULL, render_target, NULL, 0, NULL, WINED3D_TEXF_POINT);
1732 }
1733
1734 /* Context activation is done by the caller. */
1735 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1736 {
1737     if (surface->flags & SFLAG_DIBSECTION)
1738     {
1739         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1740     }
1741     else
1742     {
1743         if (!surface->resource.heapMemory)
1744             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1745         else if (!(surface->flags & SFLAG_CLIENT))
1746             ERR("Surface %p has heapMemory %p and flags %#x.\n",
1747                     surface, surface->resource.heapMemory, surface->flags);
1748
1749         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1750                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1751     }
1752
1753     ENTER_GL();
1754     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1755     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1756     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1757             surface->resource.size, surface->resource.allocatedMemory));
1758     checkGLcall("glGetBufferSubDataARB");
1759     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1760     checkGLcall("glDeleteBuffersARB");
1761     LEAVE_GL();
1762
1763     surface->pbo = 0;
1764     surface->flags &= ~SFLAG_PBO;
1765 }
1766
1767 /* Do not call while under the GL lock. */
1768 static void surface_unload(struct wined3d_resource *resource)
1769 {
1770     struct wined3d_surface *surface = surface_from_resource(resource);
1771     struct wined3d_renderbuffer_entry *entry, *entry2;
1772     struct wined3d_device *device = resource->device;
1773     const struct wined3d_gl_info *gl_info;
1774     struct wined3d_context *context;
1775
1776     TRACE("surface %p.\n", surface);
1777
1778     if (resource->pool == WINED3D_POOL_DEFAULT)
1779     {
1780         /* Default pool resources are supposed to be destroyed before Reset is called.
1781          * Implicit resources stay however. So this means we have an implicit render target
1782          * or depth stencil. The content may be destroyed, but we still have to tear down
1783          * opengl resources, so we cannot leave early.
1784          *
1785          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1786          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1787          * or the depth stencil into an FBO the texture or render buffer will be removed
1788          * and all flags get lost
1789          */
1790         if (!(surface->flags & SFLAG_PBO))
1791             surface_init_sysmem(surface);
1792         /* We also get here when the ddraw swapchain is destroyed, for example
1793          * for a mode switch. In this case this surface won't necessarily be
1794          * an implicit surface. We have to mark it lost so that the
1795          * application can restore it after the mode switch. */
1796         surface->flags |= SFLAG_LOST;
1797     }
1798     else
1799     {
1800         /* Load the surface into system memory */
1801         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1802         surface_modify_location(surface, surface->draw_binding, FALSE);
1803     }
1804     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1805     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1806     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1807
1808     context = context_acquire(device, NULL);
1809     gl_info = context->gl_info;
1810
1811     /* Destroy PBOs, but load them into real sysmem before */
1812     if (surface->flags & SFLAG_PBO)
1813         surface_remove_pbo(surface, gl_info);
1814
1815     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1816      * all application-created targets the application has to release the surface
1817      * before calling _Reset
1818      */
1819     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1820     {
1821         ENTER_GL();
1822         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1823         LEAVE_GL();
1824         list_remove(&entry->entry);
1825         HeapFree(GetProcessHeap(), 0, entry);
1826     }
1827     list_init(&surface->renderbuffers);
1828     surface->current_renderbuffer = NULL;
1829
1830     ENTER_GL();
1831
1832     /* If we're in a texture, the texture name belongs to the texture.
1833      * Otherwise, destroy it. */
1834     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1835     {
1836         glDeleteTextures(1, &surface->texture_name);
1837         surface->texture_name = 0;
1838         glDeleteTextures(1, &surface->texture_name_srgb);
1839         surface->texture_name_srgb = 0;
1840     }
1841     if (surface->rb_multisample)
1842     {
1843         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1844         surface->rb_multisample = 0;
1845     }
1846     if (surface->rb_resolved)
1847     {
1848         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1849         surface->rb_resolved = 0;
1850     }
1851
1852     LEAVE_GL();
1853
1854     context_release(context);
1855
1856     resource_unload(resource);
1857 }
1858
1859 static const struct wined3d_resource_ops surface_resource_ops =
1860 {
1861     surface_unload,
1862 };
1863
1864 static const struct wined3d_surface_ops surface_ops =
1865 {
1866     surface_private_setup,
1867     surface_realize_palette,
1868     surface_map,
1869     surface_unmap,
1870 };
1871
1872 /*****************************************************************************
1873  * Initializes the GDI surface, aka creates the DIB section we render to
1874  * The DIB section creation is done by calling GetDC, which will create the
1875  * section and releasing the dc to allow the app to use it. The dib section
1876  * will stay until the surface is released
1877  *
1878  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1879  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1880  * avoid confusion in the shared surface code.
1881  *
1882  * Returns:
1883  *  WINED3D_OK on success
1884  *  The return values of called methods on failure
1885  *
1886  *****************************************************************************/
1887 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1888 {
1889     HRESULT hr;
1890
1891     TRACE("surface %p.\n", surface);
1892
1893     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1894     {
1895         ERR("Overlays not yet supported by GDI surfaces.\n");
1896         return WINED3DERR_INVALIDCALL;
1897     }
1898
1899     /* Sysmem textures have memory already allocated - release it,
1900      * this avoids an unnecessary memcpy. */
1901     hr = surface_create_dib_section(surface);
1902     if (SUCCEEDED(hr))
1903     {
1904         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1905         surface->resource.heapMemory = NULL;
1906         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1907     }
1908
1909     /* We don't mind the nonpow2 stuff in GDI. */
1910     surface->pow2Width = surface->resource.width;
1911     surface->pow2Height = surface->resource.height;
1912
1913     return WINED3D_OK;
1914 }
1915
1916 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1917 {
1918     struct wined3d_palette *palette = surface->palette;
1919
1920     TRACE("surface %p.\n", surface);
1921
1922     if (!palette) return;
1923
1924     if (surface->flags & SFLAG_DIBSECTION)
1925     {
1926         RGBQUAD col[256];
1927         unsigned int i;
1928
1929         TRACE("Updating the DC's palette.\n");
1930
1931         for (i = 0; i < 256; ++i)
1932         {
1933             col[i].rgbRed = palette->palents[i].peRed;
1934             col[i].rgbGreen = palette->palents[i].peGreen;
1935             col[i].rgbBlue = palette->palents[i].peBlue;
1936             col[i].rgbReserved = 0;
1937         }
1938         SetDIBColorTable(surface->hDC, 0, 256, col);
1939     }
1940
1941     /* Update the image because of the palette change. Some games like e.g.
1942      * Red Alert call SetEntries a lot to implement fading. */
1943     /* Tell the swapchain to update the screen. */
1944     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1945     {
1946         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1947         if (surface == swapchain->front_buffer)
1948         {
1949             x11_copy_to_screen(swapchain, NULL);
1950         }
1951     }
1952 }
1953
1954 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
1955 {
1956     TRACE("surface %p, rect %s, flags %#x.\n",
1957             surface, wine_dbgstr_rect(rect), flags);
1958
1959     if (!(surface->flags & SFLAG_DIBSECTION))
1960     {
1961         HRESULT hr;
1962
1963         /* This happens on gdi surfaces if the application set a user pointer
1964          * and resets it. Recreate the DIB section. */
1965         if (FAILED(hr = surface_create_dib_section(surface)))
1966         {
1967             ERR("Failed to create dib section, hr %#x.\n", hr);
1968             return;
1969         }
1970         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1971         surface->resource.heapMemory = NULL;
1972         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1973     }
1974 }
1975
1976 static void gdi_surface_unmap(struct wined3d_surface *surface)
1977 {
1978     TRACE("surface %p.\n", surface);
1979
1980     /* Tell the swapchain to update the screen. */
1981     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1982     {
1983         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1984         if (surface == swapchain->front_buffer)
1985         {
1986             x11_copy_to_screen(swapchain, &surface->lockedRect);
1987         }
1988     }
1989
1990     memset(&surface->lockedRect, 0, sizeof(RECT));
1991 }
1992
1993 static const struct wined3d_surface_ops gdi_surface_ops =
1994 {
1995     gdi_surface_private_setup,
1996     gdi_surface_realize_palette,
1997     gdi_surface_map,
1998     gdi_surface_unmap,
1999 };
2000
2001 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2002 {
2003     GLuint *name;
2004     DWORD flag;
2005
2006     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2007
2008     if(srgb)
2009     {
2010         name = &surface->texture_name_srgb;
2011         flag = SFLAG_INSRGBTEX;
2012     }
2013     else
2014     {
2015         name = &surface->texture_name;
2016         flag = SFLAG_INTEXTURE;
2017     }
2018
2019     if (!*name && new_name)
2020     {
2021         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2022          * surface has no texture name yet. See if we can get rid of this. */
2023         if (surface->flags & flag)
2024         {
2025             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2026             surface_modify_location(surface, flag, FALSE);
2027         }
2028     }
2029
2030     *name = new_name;
2031     surface_force_reload(surface);
2032 }
2033
2034 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
2035 {
2036     TRACE("surface %p, target %#x.\n", surface, target);
2037
2038     if (surface->texture_target != target)
2039     {
2040         if (target == GL_TEXTURE_RECTANGLE_ARB)
2041         {
2042             surface->flags &= ~SFLAG_NORMCOORD;
2043         }
2044         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2045         {
2046             surface->flags |= SFLAG_NORMCOORD;
2047         }
2048     }
2049     surface->texture_target = target;
2050     surface_force_reload(surface);
2051 }
2052
2053 /* Context activation is done by the caller. */
2054 void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
2055 {
2056     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
2057
2058     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2059     {
2060         struct wined3d_texture *texture = surface->container.u.texture;
2061
2062         TRACE("Passing to container (%p).\n", texture);
2063         texture->texture_ops->texture_bind(texture, context, srgb);
2064     }
2065     else
2066     {
2067         if (surface->texture_level)
2068         {
2069             ERR("Standalone surface %p is non-zero texture level %u.\n",
2070                     surface, surface->texture_level);
2071         }
2072
2073         if (srgb)
2074             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
2075
2076         ENTER_GL();
2077
2078         if (!surface->texture_name)
2079         {
2080             glGenTextures(1, &surface->texture_name);
2081             checkGLcall("glGenTextures");
2082
2083             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
2084
2085             context_bind_texture(context, surface->texture_target, surface->texture_name);
2086             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2087             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2088             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
2089             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2090             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2091             checkGLcall("glTexParameteri");
2092         }
2093         else
2094         {
2095             context_bind_texture(context, surface->texture_target, surface->texture_name);
2096         }
2097
2098         LEAVE_GL();
2099     }
2100 }
2101
2102 /* This call just downloads data, the caller is responsible for binding the
2103  * correct texture. */
2104 /* Context activation is done by the caller. */
2105 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2106 {
2107     const struct wined3d_format *format = surface->resource.format;
2108
2109     /* Only support read back of converted P8 surfaces. */
2110     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2111     {
2112         ERR("Trying to read back converted surface %p with format %s.\n", surface, debug_d3dformat(format->id));
2113         return;
2114     }
2115
2116     ENTER_GL();
2117
2118     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2119     {
2120         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2121                 surface, surface->texture_level, format->glFormat, format->glType,
2122                 surface->resource.allocatedMemory);
2123
2124         if (surface->flags & SFLAG_PBO)
2125         {
2126             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2127             checkGLcall("glBindBufferARB");
2128             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2129             checkGLcall("glGetCompressedTexImageARB");
2130             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2131             checkGLcall("glBindBufferARB");
2132         }
2133         else
2134         {
2135             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2136                     surface->texture_level, surface->resource.allocatedMemory));
2137             checkGLcall("glGetCompressedTexImageARB");
2138         }
2139
2140         LEAVE_GL();
2141     }
2142     else
2143     {
2144         void *mem;
2145         GLenum gl_format = format->glFormat;
2146         GLenum gl_type = format->glType;
2147         int src_pitch = 0;
2148         int dst_pitch = 0;
2149
2150         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2151         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2152         {
2153             gl_format = GL_ALPHA;
2154             gl_type = GL_UNSIGNED_BYTE;
2155         }
2156
2157         if (surface->flags & SFLAG_NONPOW2)
2158         {
2159             unsigned char alignment = surface->resource.device->surface_alignment;
2160             src_pitch = format->byte_count * surface->pow2Width;
2161             dst_pitch = wined3d_surface_get_pitch(surface);
2162             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2163             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2164         }
2165         else
2166         {
2167             mem = surface->resource.allocatedMemory;
2168         }
2169
2170         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2171                 surface, surface->texture_level, gl_format, gl_type, mem);
2172
2173         if (surface->flags & SFLAG_PBO)
2174         {
2175             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2176             checkGLcall("glBindBufferARB");
2177
2178             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2179             checkGLcall("glGetTexImage");
2180
2181             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2182             checkGLcall("glBindBufferARB");
2183         }
2184         else
2185         {
2186             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2187             checkGLcall("glGetTexImage");
2188         }
2189         LEAVE_GL();
2190
2191         if (surface->flags & SFLAG_NONPOW2)
2192         {
2193             const BYTE *src_data;
2194             BYTE *dst_data;
2195             UINT y;
2196             /*
2197              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2198              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2199              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2200              *
2201              * We're doing this...
2202              *
2203              * instead of boxing the texture :
2204              * |<-texture width ->|  -->pow2width|   /\
2205              * |111111111111111111|              |   |
2206              * |222 Texture 222222| boxed empty  | texture height
2207              * |3333 Data 33333333|              |   |
2208              * |444444444444444444|              |   \/
2209              * -----------------------------------   |
2210              * |     boxed  empty | boxed empty  | pow2height
2211              * |                  |              |   \/
2212              * -----------------------------------
2213              *
2214              *
2215              * we're repacking the data to the expected texture width
2216              *
2217              * |<-texture width ->|  -->pow2width|   /\
2218              * |111111111111111111222222222222222|   |
2219              * |222333333333333333333444444444444| texture height
2220              * |444444                           |   |
2221              * |                                 |   \/
2222              * |                                 |   |
2223              * |            empty                | pow2height
2224              * |                                 |   \/
2225              * -----------------------------------
2226              *
2227              * == is the same as
2228              *
2229              * |<-texture width ->|    /\
2230              * |111111111111111111|
2231              * |222222222222222222|texture height
2232              * |333333333333333333|
2233              * |444444444444444444|    \/
2234              * --------------------
2235              *
2236              * this also means that any references to allocatedMemory should work with the data as if were a
2237              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2238              *
2239              * internally the texture is still stored in a boxed format so any references to textureName will
2240              * get a boxed texture with width pow2width and not a texture of width resource.width.
2241              *
2242              * Performance should not be an issue, because applications normally do not lock the surfaces when
2243              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2244              * and doesn't have to be re-read. */
2245             src_data = mem;
2246             dst_data = surface->resource.allocatedMemory;
2247             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2248             for (y = 1; y < surface->resource.height; ++y)
2249             {
2250                 /* skip the first row */
2251                 src_data += src_pitch;
2252                 dst_data += dst_pitch;
2253                 memcpy(dst_data, src_data, dst_pitch);
2254             }
2255
2256             HeapFree(GetProcessHeap(), 0, mem);
2257         }
2258     }
2259
2260     /* Surface has now been downloaded */
2261     surface->flags |= SFLAG_INSYSMEM;
2262 }
2263
2264 /* This call just uploads data, the caller is responsible for binding the
2265  * correct texture. */
2266 /* Context activation is done by the caller. */
2267 static void surface_upload_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2268         const struct wined3d_format *format, const RECT *src_rect, UINT src_pitch, const POINT *dst_point,
2269         BOOL srgb, const struct wined3d_bo_address *data)
2270 {
2271     UINT update_w = src_rect->right - src_rect->left;
2272     UINT update_h = src_rect->bottom - src_rect->top;
2273
2274     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_pitch %u, dst_point %s, srgb %#x, data {%#x:%p}.\n",
2275             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_pitch,
2276             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2277
2278     if (surface->flags & SFLAG_LOCKED)
2279     {
2280         WARN("Uploading a surface that is currently mapped, setting SFLAG_PIN_SYSMEM.\n");
2281         surface->flags |= SFLAG_PIN_SYSMEM;
2282     }
2283
2284     if (format->flags & WINED3DFMT_FLAG_HEIGHT_SCALE)
2285         update_h *= format->heightscale;
2286
2287     ENTER_GL();
2288
2289     if (data->buffer_object)
2290     {
2291         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2292         checkGLcall("glBindBufferARB");
2293     }
2294
2295     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2296     {
2297         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2298         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2299         const BYTE *addr = data->addr;
2300         GLenum internal;
2301
2302         addr += (src_rect->top / format->block_height) * src_pitch;
2303         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2304
2305         if (srgb)
2306             internal = format->glGammaInternal;
2307         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2308             internal = format->rtInternal;
2309         else
2310             internal = format->glInternal;
2311
2312         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2313                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2314                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2315
2316         if (row_length == src_pitch)
2317         {
2318             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2319                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2320         }
2321         else
2322         {
2323             UINT row, y;
2324
2325             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2326              * can't use the unpack row length like below. */
2327             for (row = 0, y = dst_point->y; row < row_count; ++row)
2328             {
2329                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2330                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2331                 y += format->block_height;
2332                 addr += src_pitch;
2333             }
2334         }
2335         checkGLcall("glCompressedTexSubImage2DARB");
2336     }
2337     else
2338     {
2339         const BYTE *addr = data->addr;
2340
2341         addr += src_rect->top * src_pitch;
2342         addr += src_rect->left * format->byte_count;
2343
2344         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2345                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2346                 update_w, update_h, format->glFormat, format->glType, addr);
2347
2348         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_pitch / format->byte_count);
2349         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2350                 update_w, update_h, format->glFormat, format->glType, addr);
2351         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2352         checkGLcall("glTexSubImage2D");
2353     }
2354
2355     if (data->buffer_object)
2356     {
2357         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2358         checkGLcall("glBindBufferARB");
2359     }
2360
2361     LEAVE_GL();
2362
2363     if (wined3d_settings.strict_draw_ordering)
2364         wglFlush();
2365
2366     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2367     {
2368         struct wined3d_device *device = surface->resource.device;
2369         unsigned int i;
2370
2371         for (i = 0; i < device->context_count; ++i)
2372         {
2373             context_surface_update(device->contexts[i], surface);
2374         }
2375     }
2376 }
2377
2378 HRESULT surface_upload_from_surface(struct wined3d_surface *dst_surface, const POINT *dst_point,
2379         struct wined3d_surface *src_surface, const RECT *src_rect)
2380 {
2381     const struct wined3d_format *src_format;
2382     const struct wined3d_format *dst_format;
2383     const struct wined3d_gl_info *gl_info;
2384     enum wined3d_conversion_type convert;
2385     struct wined3d_context *context;
2386     struct wined3d_bo_address data;
2387     struct wined3d_format format;
2388     UINT update_w, update_h;
2389     UINT dst_w, dst_h;
2390     UINT src_w, src_h;
2391     UINT src_pitch;
2392     POINT p;
2393     RECT r;
2394
2395     TRACE("dst_surface %p, dst_point %s, src_surface %p, src_rect %s.\n",
2396             dst_surface, wine_dbgstr_point(dst_point),
2397             src_surface, wine_dbgstr_rect(src_rect));
2398
2399     src_format = src_surface->resource.format;
2400     dst_format = dst_surface->resource.format;
2401
2402     if (src_format->id != dst_format->id)
2403     {
2404         WARN("Source and destination surfaces should have the same format.\n");
2405         return WINED3DERR_INVALIDCALL;
2406     }
2407
2408     if (!dst_point)
2409     {
2410         p.x = 0;
2411         p.y = 0;
2412         dst_point = &p;
2413     }
2414     else if (dst_point->x < 0 || dst_point->y < 0)
2415     {
2416         WARN("Invalid destination point.\n");
2417         return WINED3DERR_INVALIDCALL;
2418     }
2419
2420     if (!src_rect)
2421     {
2422         r.left = 0;
2423         r.top = 0;
2424         r.right = src_surface->resource.width;
2425         r.bottom = src_surface->resource.height;
2426         src_rect = &r;
2427     }
2428     else if (src_rect->left < 0 || src_rect->left >= src_rect->right
2429             || src_rect->top < 0 || src_rect->top >= src_rect->bottom)
2430     {
2431         WARN("Invalid source rectangle.\n");
2432         return WINED3DERR_INVALIDCALL;
2433     }
2434
2435     src_w = src_surface->resource.width;
2436     src_h = src_surface->resource.height;
2437
2438     dst_w = dst_surface->resource.width;
2439     dst_h = dst_surface->resource.height;
2440
2441     update_w = src_rect->right - src_rect->left;
2442     update_h = src_rect->bottom - src_rect->top;
2443
2444     if (update_w > dst_w || dst_point->x > dst_w - update_w
2445             || update_h > dst_h || dst_point->y > dst_h - update_h)
2446     {
2447         WARN("Destination out of bounds.\n");
2448         return WINED3DERR_INVALIDCALL;
2449     }
2450
2451     /* NPOT block sizes would be silly. */
2452     if ((src_format->flags & WINED3DFMT_FLAG_BLOCKS)
2453             && ((update_w & (src_format->block_width - 1) || update_h & (src_format->block_height - 1))
2454             && (src_w != update_w || dst_w != update_w || src_h != update_h || dst_h != update_h)))
2455     {
2456         WARN("Update rect not block-aligned.\n");
2457         return WINED3DERR_INVALIDCALL;
2458     }
2459
2460     /* Use wined3d_surface_blt() instead of uploading directly if we need conversion. */
2461     d3dfmt_get_conv(dst_surface, FALSE, TRUE, &format, &convert);
2462     if (convert != WINED3D_CT_NONE || format.convert)
2463     {
2464         RECT dst_rect = {dst_point->x,  dst_point->y, dst_point->x + update_w, dst_point->y + update_h};
2465         return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, src_rect, 0, NULL, WINED3D_TEXF_POINT);
2466     }
2467
2468     context = context_acquire(dst_surface->resource.device, NULL);
2469     gl_info = context->gl_info;
2470
2471     /* Only load the surface for partial updates. For newly allocated texture
2472      * the texture wouldn't be the current location, and we'd upload zeroes
2473      * just to overwrite them again. */
2474     if (update_w == dst_w && update_h == dst_h)
2475         surface_prepare_texture(dst_surface, context, FALSE);
2476     else
2477         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
2478     surface_bind(dst_surface, context, FALSE);
2479
2480     data.buffer_object = src_surface->pbo;
2481     data.addr = src_surface->resource.allocatedMemory;
2482     src_pitch = wined3d_surface_get_pitch(src_surface);
2483
2484     surface_upload_data(dst_surface, gl_info, src_format, src_rect, src_pitch, dst_point, FALSE, &data);
2485
2486     invalidate_active_texture(dst_surface->resource.device, context);
2487
2488     context_release(context);
2489
2490     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
2491     return WINED3D_OK;
2492 }
2493
2494 /* This call just allocates the texture, the caller is responsible for binding
2495  * the correct texture. */
2496 /* Context activation is done by the caller. */
2497 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2498         const struct wined3d_format *format, BOOL srgb)
2499 {
2500     BOOL enable_client_storage = FALSE;
2501     GLsizei width = surface->pow2Width;
2502     GLsizei height = surface->pow2Height;
2503     const BYTE *mem = NULL;
2504     GLenum internal;
2505
2506     if (srgb)
2507     {
2508         internal = format->glGammaInternal;
2509     }
2510     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2511     {
2512         internal = format->rtInternal;
2513     }
2514     else
2515     {
2516         internal = format->glInternal;
2517     }
2518
2519     if (format->flags & WINED3DFMT_FLAG_HEIGHT_SCALE)
2520         height *= format->heightscale;
2521
2522     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2523             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2524             internal, width, height, format->glFormat, format->glType);
2525
2526     ENTER_GL();
2527
2528     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2529     {
2530         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2531                 || !surface->resource.allocatedMemory)
2532         {
2533             /* In some cases we want to disable client storage.
2534              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2535              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2536              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2537              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2538              */
2539             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2540             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2541             surface->flags &= ~SFLAG_CLIENT;
2542             enable_client_storage = TRUE;
2543         }
2544         else
2545         {
2546             surface->flags |= SFLAG_CLIENT;
2547
2548             /* Point OpenGL to our allocated texture memory. Do not use
2549              * resource.allocatedMemory here because it might point into a
2550              * PBO. Instead use heapMemory, but get the alignment right. */
2551             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2552                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2553         }
2554     }
2555
2556     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2557     {
2558         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2559                 internal, width, height, 0, surface->resource.size, mem));
2560         checkGLcall("glCompressedTexImage2DARB");
2561     }
2562     else
2563     {
2564         glTexImage2D(surface->texture_target, surface->texture_level,
2565                 internal, width, height, 0, format->glFormat, format->glType, mem);
2566         checkGLcall("glTexImage2D");
2567     }
2568
2569     if(enable_client_storage) {
2570         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2571         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2572     }
2573     LEAVE_GL();
2574 }
2575
2576 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2577  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2578 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2579 /* GL locking is done by the caller */
2580 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2581 {
2582     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2583     struct wined3d_renderbuffer_entry *entry;
2584     GLuint renderbuffer = 0;
2585     unsigned int src_width, src_height;
2586     unsigned int width, height;
2587
2588     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2589     {
2590         width = rt->pow2Width;
2591         height = rt->pow2Height;
2592     }
2593     else
2594     {
2595         width = surface->pow2Width;
2596         height = surface->pow2Height;
2597     }
2598
2599     src_width = surface->pow2Width;
2600     src_height = surface->pow2Height;
2601
2602     /* A depth stencil smaller than the render target is not valid */
2603     if (width > src_width || height > src_height) return;
2604
2605     /* Remove any renderbuffer set if the sizes match */
2606     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2607             || (width == src_width && height == src_height))
2608     {
2609         surface->current_renderbuffer = NULL;
2610         return;
2611     }
2612
2613     /* Look if we've already got a renderbuffer of the correct dimensions */
2614     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2615     {
2616         if (entry->width == width && entry->height == height)
2617         {
2618             renderbuffer = entry->id;
2619             surface->current_renderbuffer = entry;
2620             break;
2621         }
2622     }
2623
2624     if (!renderbuffer)
2625     {
2626         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2627         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2628         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2629                 surface->resource.format->glInternal, width, height);
2630
2631         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2632         entry->width = width;
2633         entry->height = height;
2634         entry->id = renderbuffer;
2635         list_add_head(&surface->renderbuffers, &entry->entry);
2636
2637         surface->current_renderbuffer = entry;
2638     }
2639
2640     checkGLcall("set_compatible_renderbuffer");
2641 }
2642
2643 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2644 {
2645     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2646
2647     TRACE("surface %p.\n", surface);
2648
2649     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2650     {
2651         ERR("Surface %p is not on a swapchain.\n", surface);
2652         return GL_NONE;
2653     }
2654
2655     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2656     {
2657         if (swapchain->render_to_fbo)
2658         {
2659             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2660             return GL_COLOR_ATTACHMENT0;
2661         }
2662         TRACE("Returning GL_BACK\n");
2663         return GL_BACK;
2664     }
2665     else if (surface == swapchain->front_buffer)
2666     {
2667         TRACE("Returning GL_FRONT\n");
2668         return GL_FRONT;
2669     }
2670
2671     FIXME("Higher back buffer, returning GL_BACK\n");
2672     return GL_BACK;
2673 }
2674
2675 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2676 void surface_add_dirty_rect(struct wined3d_surface *surface, const struct wined3d_box *dirty_rect)
2677 {
2678     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2679
2680     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2681         /* No partial locking for textures yet. */
2682         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2683
2684     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2685     if (dirty_rect)
2686     {
2687         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->left);
2688         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->top);
2689         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->right);
2690         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->bottom);
2691     }
2692     else
2693     {
2694         surface->dirtyRect.left = 0;
2695         surface->dirtyRect.top = 0;
2696         surface->dirtyRect.right = surface->resource.width;
2697         surface->dirtyRect.bottom = surface->resource.height;
2698     }
2699
2700     /* if the container is a texture then mark it dirty. */
2701     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2702     {
2703         TRACE("Passing to container.\n");
2704         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2705     }
2706 }
2707
2708 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2709 {
2710     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2711     BOOL ck_changed;
2712
2713     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2714
2715     if (surface->resource.pool == WINED3D_POOL_SCRATCH)
2716     {
2717         ERR("Not supported on scratch surfaces.\n");
2718         return WINED3DERR_INVALIDCALL;
2719     }
2720
2721     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2722
2723     /* Reload if either the texture and sysmem have different ideas about the
2724      * color key, or the actual key values changed. */
2725     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2726             && (surface->gl_color_key.color_space_low_value != surface->src_blt_color_key.color_space_low_value
2727             || surface->gl_color_key.color_space_high_value != surface->src_blt_color_key.color_space_high_value)))
2728     {
2729         TRACE("Reloading because of color keying\n");
2730         /* To perform the color key conversion we need a sysmem copy of
2731          * the surface. Make sure we have it. */
2732
2733         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2734         /* Make sure the texture is reloaded because of the color key change,
2735          * this kills performance though :( */
2736         /* TODO: This is not necessarily needed with hw palettized texture support. */
2737         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2738         /* Switching color keying on / off may change the internal format. */
2739         if (ck_changed)
2740             surface_force_reload(surface);
2741     }
2742     else if (!(surface->flags & flag))
2743     {
2744         TRACE("Reloading because surface is dirty.\n");
2745     }
2746     else
2747     {
2748         TRACE("surface is already in texture\n");
2749         return WINED3D_OK;
2750     }
2751
2752     /* No partial locking for textures yet. */
2753     surface_load_location(surface, flag, NULL);
2754     surface_evict_sysmem(surface);
2755
2756     return WINED3D_OK;
2757 }
2758
2759 /* See also float_16_to_32() in wined3d_private.h */
2760 static inline unsigned short float_32_to_16(const float *in)
2761 {
2762     int exp = 0;
2763     float tmp = fabsf(*in);
2764     unsigned int mantissa;
2765     unsigned short ret;
2766
2767     /* Deal with special numbers */
2768     if (*in == 0.0f)
2769         return 0x0000;
2770     if (isnan(*in))
2771         return 0x7c01;
2772     if (isinf(*in))
2773         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2774
2775     if (tmp < powf(2, 10))
2776     {
2777         do
2778         {
2779             tmp = tmp * 2.0f;
2780             exp--;
2781         } while (tmp < powf(2, 10));
2782     }
2783     else if (tmp >= powf(2, 11))
2784     {
2785         do
2786         {
2787             tmp /= 2.0f;
2788             exp++;
2789         } while (tmp >= powf(2, 11));
2790     }
2791
2792     mantissa = (unsigned int)tmp;
2793     if (tmp - mantissa >= 0.5f)
2794         ++mantissa; /* Round to nearest, away from zero. */
2795
2796     exp += 10;  /* Normalize the mantissa. */
2797     exp += 15;  /* Exponent is encoded with excess 15. */
2798
2799     if (exp > 30) /* too big */
2800     {
2801         ret = 0x7c00; /* INF */
2802     }
2803     else if (exp <= 0)
2804     {
2805         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2806         while (exp <= 0)
2807         {
2808             mantissa = mantissa >> 1;
2809             ++exp;
2810         }
2811         ret = mantissa & 0x3ff;
2812     }
2813     else
2814     {
2815         ret = (exp << 10) | (mantissa & 0x3ff);
2816     }
2817
2818     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2819     return ret;
2820 }
2821
2822 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2823 {
2824     ULONG refcount;
2825
2826     TRACE("Surface %p, container %p of type %#x.\n",
2827             surface, surface->container.u.base, surface->container.type);
2828
2829     switch (surface->container.type)
2830     {
2831         case WINED3D_CONTAINER_TEXTURE:
2832             return wined3d_texture_incref(surface->container.u.texture);
2833
2834         case WINED3D_CONTAINER_SWAPCHAIN:
2835             return wined3d_swapchain_incref(surface->container.u.swapchain);
2836
2837         default:
2838             ERR("Unhandled container type %#x.\n", surface->container.type);
2839         case WINED3D_CONTAINER_NONE:
2840             break;
2841     }
2842
2843     refcount = InterlockedIncrement(&surface->resource.ref);
2844     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2845
2846     return refcount;
2847 }
2848
2849 /* Do not call while under the GL lock. */
2850 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2851 {
2852     ULONG refcount;
2853
2854     TRACE("Surface %p, container %p of type %#x.\n",
2855             surface, surface->container.u.base, surface->container.type);
2856
2857     switch (surface->container.type)
2858     {
2859         case WINED3D_CONTAINER_TEXTURE:
2860             return wined3d_texture_decref(surface->container.u.texture);
2861
2862         case WINED3D_CONTAINER_SWAPCHAIN:
2863             return wined3d_swapchain_decref(surface->container.u.swapchain);
2864
2865         default:
2866             ERR("Unhandled container type %#x.\n", surface->container.type);
2867         case WINED3D_CONTAINER_NONE:
2868             break;
2869     }
2870
2871     refcount = InterlockedDecrement(&surface->resource.ref);
2872     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2873
2874     if (!refcount)
2875     {
2876         surface_cleanup(surface);
2877         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2878
2879         TRACE("Destroyed surface %p.\n", surface);
2880         HeapFree(GetProcessHeap(), 0, surface);
2881     }
2882
2883     return refcount;
2884 }
2885
2886 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2887 {
2888     return resource_set_priority(&surface->resource, priority);
2889 }
2890
2891 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2892 {
2893     return resource_get_priority(&surface->resource);
2894 }
2895
2896 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2897 {
2898     TRACE("surface %p.\n", surface);
2899
2900     if (!surface->resource.device->d3d_initialized)
2901     {
2902         ERR("D3D not initialized.\n");
2903         return;
2904     }
2905
2906     surface_internal_preload(surface, SRGB_ANY);
2907 }
2908
2909 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2910 {
2911     TRACE("surface %p.\n", surface);
2912
2913     return surface->resource.parent;
2914 }
2915
2916 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2917 {
2918     TRACE("surface %p.\n", surface);
2919
2920     return &surface->resource;
2921 }
2922
2923 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2924 {
2925     TRACE("surface %p, flags %#x.\n", surface, flags);
2926
2927     switch (flags)
2928     {
2929         case WINEDDGBS_CANBLT:
2930         case WINEDDGBS_ISBLTDONE:
2931             return WINED3D_OK;
2932
2933         default:
2934             return WINED3DERR_INVALIDCALL;
2935     }
2936 }
2937
2938 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2939 {
2940     TRACE("surface %p, flags %#x.\n", surface, flags);
2941
2942     /* XXX: DDERR_INVALIDSURFACETYPE */
2943
2944     switch (flags)
2945     {
2946         case WINEDDGFS_CANFLIP:
2947         case WINEDDGFS_ISFLIPDONE:
2948             return WINED3D_OK;
2949
2950         default:
2951             return WINED3DERR_INVALIDCALL;
2952     }
2953 }
2954
2955 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2956 {
2957     TRACE("surface %p.\n", surface);
2958
2959     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2960     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2961 }
2962
2963 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2964 {
2965     TRACE("surface %p.\n", surface);
2966
2967     surface->flags &= ~SFLAG_LOST;
2968     return WINED3D_OK;
2969 }
2970
2971 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2972 {
2973     TRACE("surface %p, palette %p.\n", surface, palette);
2974
2975     if (surface->palette == palette)
2976     {
2977         TRACE("Nop palette change.\n");
2978         return WINED3D_OK;
2979     }
2980
2981     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
2982         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
2983
2984     surface->palette = palette;
2985
2986     if (palette)
2987     {
2988         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
2989             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
2990
2991         surface->surface_ops->surface_realize_palette(surface);
2992     }
2993
2994     return WINED3D_OK;
2995 }
2996
2997 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
2998         DWORD flags, const struct wined3d_color_key *color_key)
2999 {
3000     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
3001
3002     if (flags & WINEDDCKEY_COLORSPACE)
3003     {
3004         FIXME(" colorkey value not supported (%08x) !\n", flags);
3005         return WINED3DERR_INVALIDCALL;
3006     }
3007
3008     /* Dirtify the surface, but only if a key was changed. */
3009     if (color_key)
3010     {
3011         switch (flags & ~WINEDDCKEY_COLORSPACE)
3012         {
3013             case WINEDDCKEY_DESTBLT:
3014                 surface->dst_blt_color_key = *color_key;
3015                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3016                 break;
3017
3018             case WINEDDCKEY_DESTOVERLAY:
3019                 surface->dst_overlay_color_key = *color_key;
3020                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3021                 break;
3022
3023             case WINEDDCKEY_SRCOVERLAY:
3024                 surface->src_overlay_color_key = *color_key;
3025                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3026                 break;
3027
3028             case WINEDDCKEY_SRCBLT:
3029                 surface->src_blt_color_key = *color_key;
3030                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3031                 break;
3032         }
3033     }
3034     else
3035     {
3036         switch (flags & ~WINEDDCKEY_COLORSPACE)
3037         {
3038             case WINEDDCKEY_DESTBLT:
3039                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3040                 break;
3041
3042             case WINEDDCKEY_DESTOVERLAY:
3043                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3044                 break;
3045
3046             case WINEDDCKEY_SRCOVERLAY:
3047                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3048                 break;
3049
3050             case WINEDDCKEY_SRCBLT:
3051                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3052                 break;
3053         }
3054     }
3055
3056     return WINED3D_OK;
3057 }
3058
3059 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3060 {
3061     TRACE("surface %p.\n", surface);
3062
3063     return surface->palette;
3064 }
3065
3066 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3067 {
3068     const struct wined3d_format *format = surface->resource.format;
3069     DWORD pitch;
3070
3071     TRACE("surface %p.\n", surface);
3072
3073     if (format->flags & WINED3DFMT_FLAG_BLOCKS)
3074     {
3075         /* Since compressed formats are block based, pitch means the amount of
3076          * bytes to the next row of block rather than the next row of pixels. */
3077         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3078         pitch = row_block_count * format->block_byte_count;
3079     }
3080     else
3081     {
3082         unsigned char alignment = surface->resource.device->surface_alignment;
3083         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3084         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3085     }
3086
3087     TRACE("Returning %u.\n", pitch);
3088
3089     return pitch;
3090 }
3091
3092 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3093 {
3094     TRACE("surface %p, mem %p.\n", surface, mem);
3095
3096     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
3097     {
3098         WARN("Surface is locked or the DC is in use.\n");
3099         return WINED3DERR_INVALIDCALL;
3100     }
3101
3102     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3103     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3104     {
3105         ERR("Not supported on render targets.\n");
3106         return WINED3DERR_INVALIDCALL;
3107     }
3108
3109     if (mem && mem != surface->resource.allocatedMemory)
3110     {
3111         void *release = NULL;
3112
3113         /* Do I have to copy the old surface content? */
3114         if (surface->flags & SFLAG_DIBSECTION)
3115         {
3116             DeleteDC(surface->hDC);
3117             DeleteObject(surface->dib.DIBsection);
3118             surface->dib.bitmap_data = NULL;
3119             surface->resource.allocatedMemory = NULL;
3120             surface->hDC = NULL;
3121             surface->flags &= ~SFLAG_DIBSECTION;
3122         }
3123         else if (!(surface->flags & SFLAG_USERPTR))
3124         {
3125             release = surface->resource.heapMemory;
3126             surface->resource.heapMemory = NULL;
3127         }
3128         surface->resource.allocatedMemory = mem;
3129         surface->flags |= SFLAG_USERPTR;
3130
3131         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3132         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3133
3134         /* For client textures OpenGL has to be notified. */
3135         if (surface->flags & SFLAG_CLIENT)
3136             surface_release_client_storage(surface);
3137
3138         /* Now free the old memory if any. */
3139         HeapFree(GetProcessHeap(), 0, release);
3140     }
3141     else if (surface->flags & SFLAG_USERPTR)
3142     {
3143         /* HeapMemory should be NULL already. */
3144         if (surface->resource.heapMemory)
3145             ERR("User pointer surface has heap memory allocated.\n");
3146
3147         if (!mem)
3148         {
3149             surface->resource.allocatedMemory = NULL;
3150             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3151
3152             if (surface->flags & SFLAG_CLIENT)
3153                 surface_release_client_storage(surface);
3154
3155             surface_prepare_system_memory(surface);
3156         }
3157
3158         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3159     }
3160
3161     return WINED3D_OK;
3162 }
3163
3164 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3165 {
3166     LONG w, h;
3167
3168     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3169
3170     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3171     {
3172         WARN("Not an overlay surface.\n");
3173         return WINEDDERR_NOTAOVERLAYSURFACE;
3174     }
3175
3176     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3177     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3178     surface->overlay_destrect.left = x;
3179     surface->overlay_destrect.top = y;
3180     surface->overlay_destrect.right = x + w;
3181     surface->overlay_destrect.bottom = y + h;
3182
3183     surface_draw_overlay(surface);
3184
3185     return WINED3D_OK;
3186 }
3187
3188 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3189 {
3190     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3191
3192     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3193     {
3194         TRACE("Not an overlay surface.\n");
3195         return WINEDDERR_NOTAOVERLAYSURFACE;
3196     }
3197
3198     if (!surface->overlay_dest)
3199     {
3200         TRACE("Overlay not visible.\n");
3201         *x = 0;
3202         *y = 0;
3203         return WINEDDERR_OVERLAYNOTVISIBLE;
3204     }
3205
3206     *x = surface->overlay_destrect.left;
3207     *y = surface->overlay_destrect.top;
3208
3209     TRACE("Returning position %d, %d.\n", *x, *y);
3210
3211     return WINED3D_OK;
3212 }
3213
3214 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3215         DWORD flags, struct wined3d_surface *ref)
3216 {
3217     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3218
3219     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3220     {
3221         TRACE("Not an overlay surface.\n");
3222         return WINEDDERR_NOTAOVERLAYSURFACE;
3223     }
3224
3225     return WINED3D_OK;
3226 }
3227
3228 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3229         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3230 {
3231     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3232             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3233
3234     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3235     {
3236         WARN("Not an overlay surface.\n");
3237         return WINEDDERR_NOTAOVERLAYSURFACE;
3238     }
3239     else if (!dst_surface)
3240     {
3241         WARN("Dest surface is NULL.\n");
3242         return WINED3DERR_INVALIDCALL;
3243     }
3244
3245     if (src_rect)
3246     {
3247         surface->overlay_srcrect = *src_rect;
3248     }
3249     else
3250     {
3251         surface->overlay_srcrect.left = 0;
3252         surface->overlay_srcrect.top = 0;
3253         surface->overlay_srcrect.right = surface->resource.width;
3254         surface->overlay_srcrect.bottom = surface->resource.height;
3255     }
3256
3257     if (dst_rect)
3258     {
3259         surface->overlay_destrect = *dst_rect;
3260     }
3261     else
3262     {
3263         surface->overlay_destrect.left = 0;
3264         surface->overlay_destrect.top = 0;
3265         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3266         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3267     }
3268
3269     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3270     {
3271         surface->overlay_dest = NULL;
3272         list_remove(&surface->overlay_entry);
3273     }
3274
3275     if (flags & WINEDDOVER_SHOW)
3276     {
3277         if (surface->overlay_dest != dst_surface)
3278         {
3279             surface->overlay_dest = dst_surface;
3280             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3281         }
3282     }
3283     else if (flags & WINEDDOVER_HIDE)
3284     {
3285         /* tests show that the rectangles are erased on hide */
3286         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3287         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3288         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3289         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3290         surface->overlay_dest = NULL;
3291     }
3292
3293     surface_draw_overlay(surface);
3294
3295     return WINED3D_OK;
3296 }
3297
3298 HRESULT CDECL wined3d_surface_update_desc(struct wined3d_surface *surface,
3299         UINT width, UINT height, enum wined3d_format_id format_id,
3300         enum wined3d_multisample_type multisample_type, UINT multisample_quality)
3301 {
3302     struct wined3d_device *device = surface->resource.device;
3303     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
3304     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
3305     UINT resource_size = wined3d_format_calculate_size(format, device->surface_alignment, width, height);
3306
3307     TRACE("surface %p, width %u, height %u, format %s, multisample_type %#x, multisample_quality %u.\n",
3308             surface, width, height, debug_d3dformat(format_id), multisample_type, multisample_type);
3309
3310     if (!resource_size)
3311         return WINED3DERR_INVALIDCALL;
3312
3313     if (device->d3d_initialized)
3314         surface->resource.resource_ops->resource_unload(&surface->resource);
3315
3316     if (surface->flags & SFLAG_DIBSECTION)
3317     {
3318         DeleteDC(surface->hDC);
3319         DeleteObject(surface->dib.DIBsection);
3320         surface->dib.bitmap_data = NULL;
3321         surface->flags &= ~SFLAG_DIBSECTION;
3322     }
3323
3324     surface->flags &= ~(SFLAG_LOCATIONS | SFLAG_USERPTR);
3325     surface->resource.allocatedMemory = NULL;
3326     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
3327     surface->resource.heapMemory = NULL;
3328
3329     surface->resource.width = width;
3330     surface->resource.height = height;
3331     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[ARB_TEXTURE_RECTANGLE]
3332             || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
3333     {
3334         surface->pow2Width = width;
3335         surface->pow2Height = height;
3336     }
3337     else
3338     {
3339         surface->pow2Width = surface->pow2Height = 1;
3340         while (surface->pow2Width < width)
3341             surface->pow2Width <<= 1;
3342         while (surface->pow2Height < height)
3343             surface->pow2Height <<= 1;
3344     }
3345
3346     if (surface->pow2Width != width || surface->pow2Height != height)
3347         surface->flags |= SFLAG_NONPOW2;
3348     else
3349         surface->flags &= ~SFLAG_NONPOW2;
3350
3351     surface->resource.format = format;
3352     surface->resource.multisample_type = multisample_type;
3353     surface->resource.multisample_quality = multisample_quality;
3354     surface->resource.size = resource_size;
3355
3356     if (!surface_init_sysmem(surface))
3357         return E_OUTOFMEMORY;
3358
3359     return WINED3D_OK;
3360 }
3361
3362 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3363         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3364 {
3365     unsigned short *dst_s;
3366     const float *src_f;
3367     unsigned int x, y;
3368
3369     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3370
3371     for (y = 0; y < h; ++y)
3372     {
3373         src_f = (const float *)(src + y * pitch_in);
3374         dst_s = (unsigned short *) (dst + y * pitch_out);
3375         for (x = 0; x < w; ++x)
3376         {
3377             dst_s[x] = float_32_to_16(src_f + x);
3378         }
3379     }
3380 }
3381
3382 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3383         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3384 {
3385     static const unsigned char convert_5to8[] =
3386     {
3387         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3388         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3389         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3390         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3391     };
3392     static const unsigned char convert_6to8[] =
3393     {
3394         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3395         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3396         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3397         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3398         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3399         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3400         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3401         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3402     };
3403     unsigned int x, y;
3404
3405     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3406
3407     for (y = 0; y < h; ++y)
3408     {
3409         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3410         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3411         for (x = 0; x < w; ++x)
3412         {
3413             WORD pixel = src_line[x];
3414             dst_line[x] = 0xff000000
3415                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3416                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3417                     | convert_5to8[(pixel & 0x001f)];
3418         }
3419     }
3420 }
3421
3422 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3423  * in both cases we're just setting the X / Alpha channel to 0xff. */
3424 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3425         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3426 {
3427     unsigned int x, y;
3428
3429     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3430
3431     for (y = 0; y < h; ++y)
3432     {
3433         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3434         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3435
3436         for (x = 0; x < w; ++x)
3437         {
3438             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3439         }
3440     }
3441 }
3442
3443 static inline BYTE cliptobyte(int x)
3444 {
3445     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3446 }
3447
3448 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3449         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3450 {
3451     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3452     unsigned int x, y;
3453
3454     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3455
3456     for (y = 0; y < h; ++y)
3457     {
3458         const BYTE *src_line = src + y * pitch_in;
3459         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3460         for (x = 0; x < w; ++x)
3461         {
3462             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3463              *     C = Y - 16; D = U - 128; E = V - 128;
3464              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3465              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3466              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3467              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3468              * U and V are shared between the pixels. */
3469             if (!(x & 1)) /* For every even pixel, read new U and V. */
3470             {
3471                 d = (int) src_line[1] - 128;
3472                 e = (int) src_line[3] - 128;
3473                 r2 = 409 * e + 128;
3474                 g2 = - 100 * d - 208 * e + 128;
3475                 b2 = 516 * d + 128;
3476             }
3477             c2 = 298 * ((int) src_line[0] - 16);
3478             dst_line[x] = 0xff000000
3479                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3480                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3481                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3482                 /* Scale RGB values to 0..255 range,
3483                  * then clip them if still not in range (may be negative),
3484                  * then shift them within DWORD if necessary. */
3485             src_line += 2;
3486         }
3487     }
3488 }
3489
3490 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3491         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3492 {
3493     unsigned int x, y;
3494     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3495
3496     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3497
3498     for (y = 0; y < h; ++y)
3499     {
3500         const BYTE *src_line = src + y * pitch_in;
3501         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3502         for (x = 0; x < w; ++x)
3503         {
3504             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3505              *     C = Y - 16; D = U - 128; E = V - 128;
3506              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3507              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3508              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3509              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3510              * U and V are shared between the pixels. */
3511             if (!(x & 1)) /* For every even pixel, read new U and V. */
3512             {
3513                 d = (int) src_line[1] - 128;
3514                 e = (int) src_line[3] - 128;
3515                 r2 = 409 * e + 128;
3516                 g2 = - 100 * d - 208 * e + 128;
3517                 b2 = 516 * d + 128;
3518             }
3519             c2 = 298 * ((int) src_line[0] - 16);
3520             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3521                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3522                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3523                 /* Scale RGB values to 0..255 range,
3524                  * then clip them if still not in range (may be negative),
3525                  * then shift them within DWORD if necessary. */
3526             src_line += 2;
3527         }
3528     }
3529 }
3530
3531 struct d3dfmt_convertor_desc
3532 {
3533     enum wined3d_format_id from, to;
3534     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3535 };
3536
3537 static const struct d3dfmt_convertor_desc convertors[] =
3538 {
3539     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3540     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3541     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3542     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3543     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3544     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3545 };
3546
3547 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3548         enum wined3d_format_id to)
3549 {
3550     unsigned int i;
3551
3552     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3553     {
3554         if (convertors[i].from == from && convertors[i].to == to)
3555             return &convertors[i];
3556     }
3557
3558     return NULL;
3559 }
3560
3561 /*****************************************************************************
3562  * surface_convert_format
3563  *
3564  * Creates a duplicate of a surface in a different format. Is used by Blt to
3565  * blit between surfaces with different formats.
3566  *
3567  * Parameters
3568  *  source: Source surface
3569  *  fmt: Requested destination format
3570  *
3571  *****************************************************************************/
3572 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3573 {
3574     struct wined3d_mapped_rect src_map, dst_map;
3575     const struct d3dfmt_convertor_desc *conv;
3576     struct wined3d_surface *ret = NULL;
3577     HRESULT hr;
3578
3579     conv = find_convertor(source->resource.format->id, to_fmt);
3580     if (!conv)
3581     {
3582         FIXME("Cannot find a conversion function from format %s to %s.\n",
3583                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3584         return NULL;
3585     }
3586
3587     wined3d_surface_create(source->resource.device, source->resource.width,
3588             source->resource.height, to_fmt, 0 /* level */, 0 /* usage */, WINED3D_POOL_SCRATCH,
3589             WINED3D_MULTISAMPLE_NONE /* TODO: Multisampled conversion */, 0 /* MultiSampleQuality */,
3590             source->surface_type, WINED3D_SURFACE_MAPPABLE | WINED3D_SURFACE_DISCARD,
3591             NULL /* parent */, &wined3d_null_parent_ops, &ret);
3592     if (!ret)
3593     {
3594         ERR("Failed to create a destination surface for conversion.\n");
3595         return NULL;
3596     }
3597
3598     memset(&src_map, 0, sizeof(src_map));
3599     memset(&dst_map, 0, sizeof(dst_map));
3600
3601     hr = wined3d_surface_map(source, &src_map, NULL, WINED3DLOCK_READONLY);
3602     if (FAILED(hr))
3603     {
3604         ERR("Failed to lock the source surface.\n");
3605         wined3d_surface_decref(ret);
3606         return NULL;
3607     }
3608     hr = wined3d_surface_map(ret, &dst_map, NULL, WINED3DLOCK_READONLY);
3609     if (FAILED(hr))
3610     {
3611         ERR("Failed to lock the destination surface.\n");
3612         wined3d_surface_unmap(source);
3613         wined3d_surface_decref(ret);
3614         return NULL;
3615     }
3616
3617     conv->convert(src_map.data, dst_map.data, src_map.row_pitch, dst_map.row_pitch,
3618             source->resource.width, source->resource.height);
3619
3620     wined3d_surface_unmap(ret);
3621     wined3d_surface_unmap(source);
3622
3623     return ret;
3624 }
3625
3626 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3627         unsigned int bpp, UINT pitch, DWORD color)
3628 {
3629     BYTE *first;
3630     int x, y;
3631
3632     /* Do first row */
3633
3634 #define COLORFILL_ROW(type) \
3635 do { \
3636     type *d = (type *)buf; \
3637     for (x = 0; x < width; ++x) \
3638         d[x] = (type)color; \
3639 } while(0)
3640
3641     switch (bpp)
3642     {
3643         case 1:
3644             COLORFILL_ROW(BYTE);
3645             break;
3646
3647         case 2:
3648             COLORFILL_ROW(WORD);
3649             break;
3650
3651         case 3:
3652         {
3653             BYTE *d = buf;
3654             for (x = 0; x < width; ++x, d += 3)
3655             {
3656                 d[0] = (color      ) & 0xFF;
3657                 d[1] = (color >>  8) & 0xFF;
3658                 d[2] = (color >> 16) & 0xFF;
3659             }
3660             break;
3661         }
3662         case 4:
3663             COLORFILL_ROW(DWORD);
3664             break;
3665
3666         default:
3667             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3668             return WINED3DERR_NOTAVAILABLE;
3669     }
3670
3671 #undef COLORFILL_ROW
3672
3673     /* Now copy first row. */
3674     first = buf;
3675     for (y = 1; y < height; ++y)
3676     {
3677         buf += pitch;
3678         memcpy(buf, first, width * bpp);
3679     }
3680
3681     return WINED3D_OK;
3682 }
3683
3684 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3685 {
3686     TRACE("surface %p.\n", surface);
3687
3688     if (!(surface->flags & SFLAG_LOCKED))
3689     {
3690         WARN("Trying to unmap unmapped surface.\n");
3691         return WINEDDERR_NOTLOCKED;
3692     }
3693     surface->flags &= ~SFLAG_LOCKED;
3694
3695     surface->surface_ops->surface_unmap(surface);
3696
3697     return WINED3D_OK;
3698 }
3699
3700 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3701         struct wined3d_mapped_rect *mapped_rect, const RECT *rect, DWORD flags)
3702 {
3703     const struct wined3d_format *format = surface->resource.format;
3704
3705     TRACE("surface %p, mapped_rect %p, rect %s, flags %#x.\n",
3706             surface, mapped_rect, wine_dbgstr_rect(rect), flags);
3707
3708     if (surface->flags & SFLAG_LOCKED)
3709     {
3710         WARN("Surface is already mapped.\n");
3711         return WINED3DERR_INVALIDCALL;
3712     }
3713     if ((format->flags & WINED3DFMT_FLAG_BLOCKS)
3714             && rect && (rect->left || rect->top
3715             || rect->right != surface->resource.width
3716             || rect->bottom != surface->resource.height))
3717     {
3718         UINT width_mask = format->block_width - 1;
3719         UINT height_mask = format->block_height - 1;
3720
3721         if ((rect->left & width_mask) || (rect->right & width_mask)
3722                 || (rect->top & height_mask) || (rect->bottom & height_mask))
3723         {
3724             WARN("Map rect %s is misaligned for %ux%u blocks.\n",
3725                     wine_dbgstr_rect(rect), format->block_width, format->block_height);
3726
3727             if (surface->resource.pool == WINED3D_POOL_DEFAULT)
3728                 return WINED3DERR_INVALIDCALL;
3729         }
3730     }
3731
3732     surface->flags |= SFLAG_LOCKED;
3733
3734     if (!(surface->flags & SFLAG_LOCKABLE))
3735         WARN("Trying to lock unlockable surface.\n");
3736
3737     /* Performance optimization: Count how often a surface is mapped, if it is
3738      * mapped regularly do not throw away the system memory copy. This avoids
3739      * the need to download the surface from OpenGL all the time. The surface
3740      * is still downloaded if the OpenGL texture is changed. */
3741     if (!(surface->flags & SFLAG_DYNLOCK))
3742     {
3743         if (++surface->lockCount > MAXLOCKCOUNT)
3744         {
3745             TRACE("Surface is mapped regularly, not freeing the system memory copy any more.\n");
3746             surface->flags |= SFLAG_DYNLOCK;
3747         }
3748     }
3749
3750     surface->surface_ops->surface_map(surface, rect, flags);
3751
3752     if (format->flags & WINED3DFMT_FLAG_BROKEN_PITCH)
3753         mapped_rect->row_pitch = surface->resource.width * format->byte_count;
3754     else
3755         mapped_rect->row_pitch = wined3d_surface_get_pitch(surface);
3756
3757     if (!rect)
3758     {
3759         mapped_rect->data = surface->resource.allocatedMemory;
3760         surface->lockedRect.left = 0;
3761         surface->lockedRect.top = 0;
3762         surface->lockedRect.right = surface->resource.width;
3763         surface->lockedRect.bottom = surface->resource.height;
3764     }
3765     else
3766     {
3767         if ((format->flags & (WINED3DFMT_FLAG_BLOCKS | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_BLOCKS)
3768         {
3769             /* Compressed textures are block based, so calculate the offset of
3770              * the block that contains the top-left pixel of the locked rectangle. */
3771             mapped_rect->data = surface->resource.allocatedMemory
3772                     + ((rect->top / format->block_height) * mapped_rect->row_pitch)
3773                     + ((rect->left / format->block_width) * format->block_byte_count);
3774         }
3775         else
3776         {
3777             mapped_rect->data = surface->resource.allocatedMemory
3778                     + (mapped_rect->row_pitch * rect->top)
3779                     + (rect->left * format->byte_count);
3780         }
3781         surface->lockedRect.left = rect->left;
3782         surface->lockedRect.top = rect->top;
3783         surface->lockedRect.right = rect->right;
3784         surface->lockedRect.bottom = rect->bottom;
3785     }
3786
3787     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3788     TRACE("Returning memory %p, pitch %u.\n", mapped_rect->data, mapped_rect->row_pitch);
3789
3790     return WINED3D_OK;
3791 }
3792
3793 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3794 {
3795     struct wined3d_mapped_rect map;
3796     HRESULT hr;
3797
3798     TRACE("surface %p, dc %p.\n", surface, dc);
3799
3800     if (surface->flags & SFLAG_USERPTR)
3801     {
3802         ERR("Not supported on surfaces with application-provided memory.\n");
3803         return WINEDDERR_NODC;
3804     }
3805
3806     /* Give more detailed info for ddraw. */
3807     if (surface->flags & SFLAG_DCINUSE)
3808         return WINEDDERR_DCALREADYCREATED;
3809
3810     /* Can't GetDC if the surface is locked. */
3811     if (surface->flags & SFLAG_LOCKED)
3812         return WINED3DERR_INVALIDCALL;
3813
3814     /* Create a DIB section if there isn't a dc yet. */
3815     if (!surface->hDC)
3816     {
3817         if (surface->flags & SFLAG_CLIENT)
3818         {
3819             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3820             surface_release_client_storage(surface);
3821         }
3822         hr = surface_create_dib_section(surface);
3823         if (FAILED(hr))
3824             return WINED3DERR_INVALIDCALL;
3825
3826         /* Use the DIB section from now on if we are not using a PBO. */
3827         if (!(surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM)))
3828         {
3829             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
3830             surface->resource.heapMemory = NULL;
3831             surface->resource.allocatedMemory = surface->dib.bitmap_data;
3832         }
3833     }
3834
3835     /* Map the surface. */
3836     hr = wined3d_surface_map(surface, &map, NULL, 0);
3837     if (FAILED(hr))
3838     {
3839         ERR("Map failed, hr %#x.\n", hr);
3840         return hr;
3841     }
3842
3843     /* Sync the DIB with the PBO. This can't be done earlier because Map()
3844      * activates the allocatedMemory. */
3845     if (surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM))
3846         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
3847
3848     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3849             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3850     {
3851         /* GetDC on palettized formats is unsupported in D3D9, and the method
3852          * is missing in D3D8, so this should only be used for DX <=7
3853          * surfaces (with non-device palettes). */
3854         const PALETTEENTRY *pal = NULL;
3855
3856         if (surface->palette)
3857         {
3858             pal = surface->palette->palents;
3859         }
3860         else
3861         {
3862             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3863             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3864
3865             if (dds_primary && dds_primary->palette)
3866                 pal = dds_primary->palette->palents;
3867         }
3868
3869         if (pal)
3870         {
3871             RGBQUAD col[256];
3872             unsigned int i;
3873
3874             for (i = 0; i < 256; ++i)
3875             {
3876                 col[i].rgbRed = pal[i].peRed;
3877                 col[i].rgbGreen = pal[i].peGreen;
3878                 col[i].rgbBlue = pal[i].peBlue;
3879                 col[i].rgbReserved = 0;
3880             }
3881             SetDIBColorTable(surface->hDC, 0, 256, col);
3882         }
3883     }
3884
3885     surface->flags |= SFLAG_DCINUSE;
3886
3887     *dc = surface->hDC;
3888     TRACE("Returning dc %p.\n", *dc);
3889
3890     return WINED3D_OK;
3891 }
3892
3893 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3894 {
3895     TRACE("surface %p, dc %p.\n", surface, dc);
3896
3897     if (!(surface->flags & SFLAG_DCINUSE))
3898         return WINEDDERR_NODC;
3899
3900     if (surface->hDC != dc)
3901     {
3902         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3903                 dc, surface->hDC);
3904         return WINEDDERR_NODC;
3905     }
3906
3907     /* Copy the contents of the DIB over to the PBO. */
3908     if ((surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM)) && surface->resource.allocatedMemory)
3909         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
3910
3911     /* We locked first, so unlock now. */
3912     wined3d_surface_unmap(surface);
3913
3914     surface->flags &= ~SFLAG_DCINUSE;
3915
3916     return WINED3D_OK;
3917 }
3918
3919 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3920 {
3921     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3922
3923     if (flags)
3924     {
3925         static UINT once;
3926         if (!once++)
3927             FIXME("Ignoring flags %#x.\n", flags);
3928         else
3929             WARN("Ignoring flags %#x.\n", flags);
3930     }
3931
3932     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
3933     {
3934         ERR("Not supported on swapchain surfaces.\n");
3935         return WINEDDERR_NOTFLIPPABLE;
3936     }
3937
3938     /* Flipping is only supported on render targets and overlays. */
3939     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
3940     {
3941         WARN("Tried to flip a non-render target, non-overlay surface.\n");
3942         return WINEDDERR_NOTFLIPPABLE;
3943     }
3944
3945     flip_surface(surface, override);
3946
3947     /* Update overlays if they're visible. */
3948     if ((surface->resource.usage & WINED3DUSAGE_OVERLAY) && surface->overlay_dest)
3949         return surface_draw_overlay(surface);
3950
3951     return WINED3D_OK;
3952 }
3953
3954 /* Do not call while under the GL lock. */
3955 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3956 {
3957     struct wined3d_device *device = surface->resource.device;
3958
3959     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3960
3961     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3962     {
3963         struct wined3d_texture *texture = surface->container.u.texture;
3964
3965         TRACE("Passing to container (%p).\n", texture);
3966         texture->texture_ops->texture_preload(texture, srgb);
3967     }
3968     else
3969     {
3970         struct wined3d_context *context;
3971
3972         TRACE("(%p) : About to load surface\n", surface);
3973
3974         /* TODO: Use already acquired context when possible. */
3975         context = context_acquire(device, NULL);
3976
3977         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3978
3979         if (surface->resource.pool == WINED3D_POOL_DEFAULT)
3980         {
3981             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3982             GLclampf tmp;
3983             tmp = 0.9f;
3984             ENTER_GL();
3985             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3986             LEAVE_GL();
3987         }
3988
3989         context_release(context);
3990     }
3991 }
3992
3993 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3994 {
3995     if (!surface->resource.allocatedMemory)
3996     {
3997         if (!surface->resource.heapMemory)
3998         {
3999             if (!(surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
4000                     surface->resource.size + RESOURCE_ALIGNMENT)))
4001             {
4002                 ERR("Failed to allocate memory.\n");
4003                 return FALSE;
4004             }
4005         }
4006         else if (!(surface->flags & SFLAG_CLIENT))
4007         {
4008             ERR("Surface %p has heapMemory %p and flags %#x.\n",
4009                     surface, surface->resource.heapMemory, surface->flags);
4010         }
4011
4012         surface->resource.allocatedMemory =
4013             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
4014     }
4015     else
4016     {
4017         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
4018     }
4019
4020     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
4021
4022     return TRUE;
4023 }
4024
4025 /* Read the framebuffer back into the surface */
4026 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
4027 {
4028     struct wined3d_device *device = surface->resource.device;
4029     const struct wined3d_gl_info *gl_info;
4030     struct wined3d_context *context;
4031     BYTE *mem;
4032     GLint fmt;
4033     GLint type;
4034     BYTE *row, *top, *bottom;
4035     int i;
4036     BOOL bpp;
4037     RECT local_rect;
4038     BOOL srcIsUpsideDown;
4039     GLint rowLen = 0;
4040     GLint skipPix = 0;
4041     GLint skipRow = 0;
4042
4043     context = context_acquire(device, surface);
4044     context_apply_blit_state(context, device);
4045     gl_info = context->gl_info;
4046
4047     ENTER_GL();
4048
4049     /* Select the correct read buffer, and give some debug output.
4050      * There is no need to keep track of the current read buffer or reset it, every part of the code
4051      * that reads sets the read buffer as desired.
4052      */
4053     if (surface_is_offscreen(surface))
4054     {
4055         /* Mapping the primary render target which is not on a swapchain.
4056          * Read from the back buffer. */
4057         TRACE("Mapping offscreen render target.\n");
4058         glReadBuffer(device->offscreenBuffer);
4059         srcIsUpsideDown = TRUE;
4060     }
4061     else
4062     {
4063         /* Onscreen surfaces are always part of a swapchain */
4064         GLenum buffer = surface_get_gl_buffer(surface);
4065         TRACE("Mapping %#x buffer.\n", buffer);
4066         glReadBuffer(buffer);
4067         checkGLcall("glReadBuffer");
4068         srcIsUpsideDown = FALSE;
4069     }
4070
4071     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
4072     if (!rect)
4073     {
4074         local_rect.left = 0;
4075         local_rect.top = 0;
4076         local_rect.right = surface->resource.width;
4077         local_rect.bottom = surface->resource.height;
4078     }
4079     else
4080     {
4081         local_rect = *rect;
4082     }
4083     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
4084
4085     switch (surface->resource.format->id)
4086     {
4087         case WINED3DFMT_P8_UINT:
4088         {
4089             if (primary_render_target_is_p8(device))
4090             {
4091                 /* In case of P8 render targets the index is stored in the alpha component */
4092                 fmt = GL_ALPHA;
4093                 type = GL_UNSIGNED_BYTE;
4094                 mem = dest;
4095                 bpp = surface->resource.format->byte_count;
4096             }
4097             else
4098             {
4099                 /* GL can't return palettized data, so read ARGB pixels into a
4100                  * separate block of memory and convert them into palettized format
4101                  * in software. Slow, but if the app means to use palettized render
4102                  * targets and locks it...
4103                  *
4104                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4105                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4106                  * for the color channels when palettizing the colors.
4107                  */
4108                 fmt = GL_RGB;
4109                 type = GL_UNSIGNED_BYTE;
4110                 pitch *= 3;
4111                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4112                 if (!mem)
4113                 {
4114                     ERR("Out of memory\n");
4115                     LEAVE_GL();
4116                     return;
4117                 }
4118                 bpp = surface->resource.format->byte_count * 3;
4119             }
4120         }
4121         break;
4122
4123         default:
4124             mem = dest;
4125             fmt = surface->resource.format->glFormat;
4126             type = surface->resource.format->glType;
4127             bpp = surface->resource.format->byte_count;
4128     }
4129
4130     if (surface->flags & SFLAG_PBO)
4131     {
4132         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4133         checkGLcall("glBindBufferARB");
4134         if (mem)
4135         {
4136             ERR("mem not null for pbo -- unexpected\n");
4137             mem = NULL;
4138         }
4139     }
4140
4141     /* Save old pixel store pack state */
4142     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4143     checkGLcall("glGetIntegerv");
4144     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4145     checkGLcall("glGetIntegerv");
4146     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4147     checkGLcall("glGetIntegerv");
4148
4149     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4150     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4151     checkGLcall("glPixelStorei");
4152     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4153     checkGLcall("glPixelStorei");
4154     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4155     checkGLcall("glPixelStorei");
4156
4157     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4158             local_rect.right - local_rect.left,
4159             local_rect.bottom - local_rect.top,
4160             fmt, type, mem);
4161     checkGLcall("glReadPixels");
4162
4163     /* Reset previous pixel store pack state */
4164     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4165     checkGLcall("glPixelStorei");
4166     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4167     checkGLcall("glPixelStorei");
4168     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4169     checkGLcall("glPixelStorei");
4170
4171     if (surface->flags & SFLAG_PBO)
4172     {
4173         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4174         checkGLcall("glBindBufferARB");
4175
4176         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4177          * to get a pointer to it and perform the flipping in software. This is a lot
4178          * faster than calling glReadPixels for each line. In case we want more speed
4179          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4180         if (!srcIsUpsideDown)
4181         {
4182             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4183             checkGLcall("glBindBufferARB");
4184
4185             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4186             checkGLcall("glMapBufferARB");
4187         }
4188     }
4189
4190     /* TODO: Merge this with the palettization loop below for P8 targets */
4191     if(!srcIsUpsideDown) {
4192         UINT len, off;
4193         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4194             Flip the lines in software */
4195         len = (local_rect.right - local_rect.left) * bpp;
4196         off = local_rect.left * bpp;
4197
4198         row = HeapAlloc(GetProcessHeap(), 0, len);
4199         if(!row) {
4200             ERR("Out of memory\n");
4201             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4202                 HeapFree(GetProcessHeap(), 0, mem);
4203             LEAVE_GL();
4204             return;
4205         }
4206
4207         top = mem + pitch * local_rect.top;
4208         bottom = mem + pitch * (local_rect.bottom - 1);
4209         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4210             memcpy(row, top + off, len);
4211             memcpy(top + off, bottom + off, len);
4212             memcpy(bottom + off, row, len);
4213             top += pitch;
4214             bottom -= pitch;
4215         }
4216         HeapFree(GetProcessHeap(), 0, row);
4217
4218         /* Unmap the temp PBO buffer */
4219         if (surface->flags & SFLAG_PBO)
4220         {
4221             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4222             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4223         }
4224     }
4225
4226     LEAVE_GL();
4227     context_release(context);
4228
4229     /* For P8 textures we need to perform an inverse palette lookup. This is
4230      * done by searching for a palette index which matches the RGB value.
4231      * Note this isn't guaranteed to work when there are multiple entries for
4232      * the same color but we have no choice. In case of P8 render targets,
4233      * the index is stored in the alpha component so no conversion is needed. */
4234     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4235     {
4236         const PALETTEENTRY *pal = NULL;
4237         DWORD width = pitch / 3;
4238         int x, y, c;
4239
4240         if (surface->palette)
4241         {
4242             pal = surface->palette->palents;
4243         }
4244         else
4245         {
4246             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4247             HeapFree(GetProcessHeap(), 0, mem);
4248             return;
4249         }
4250
4251         for(y = local_rect.top; y < local_rect.bottom; y++) {
4252             for(x = local_rect.left; x < local_rect.right; x++) {
4253                 /*                      start              lines            pixels      */
4254                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4255                 const BYTE *green = blue  + 1;
4256                 const BYTE *red = green + 1;
4257
4258                 for(c = 0; c < 256; c++) {
4259                     if(*red   == pal[c].peRed   &&
4260                        *green == pal[c].peGreen &&
4261                        *blue  == pal[c].peBlue)
4262                     {
4263                         *((BYTE *) dest + y * width + x) = c;
4264                         break;
4265                     }
4266                 }
4267             }
4268         }
4269         HeapFree(GetProcessHeap(), 0, mem);
4270     }
4271 }
4272
4273 /* Read the framebuffer contents into a texture. Note that this function
4274  * doesn't do any kind of flipping. Using this on an onscreen surface will
4275  * result in a flipped D3D texture. */
4276 void surface_load_fb_texture(struct wined3d_surface *surface, BOOL srgb)
4277 {
4278     struct wined3d_device *device = surface->resource.device;
4279     struct wined3d_context *context;
4280
4281     context = context_acquire(device, surface);
4282     device_invalidate_state(device, STATE_FRAMEBUFFER);
4283
4284     surface_prepare_texture(surface, context, srgb);
4285     surface_bind_and_dirtify(surface, context, srgb);
4286
4287     TRACE("Reading back offscreen render target %p.\n", surface);
4288
4289     ENTER_GL();
4290
4291     if (surface_is_offscreen(surface))
4292         glReadBuffer(device->offscreenBuffer);
4293     else
4294         glReadBuffer(surface_get_gl_buffer(surface));
4295     checkGLcall("glReadBuffer");
4296
4297     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4298             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4299     checkGLcall("glCopyTexSubImage2D");
4300
4301     LEAVE_GL();
4302
4303     context_release(context);
4304 }
4305
4306 /* Context activation is done by the caller. */
4307 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4308         struct wined3d_context *context, BOOL srgb)
4309 {
4310     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4311     enum wined3d_conversion_type convert;
4312     struct wined3d_format format;
4313
4314     if (surface->flags & alloc_flag) return;
4315
4316     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4317     if (convert != WINED3D_CT_NONE || format.convert)
4318         surface->flags |= SFLAG_CONVERTED;
4319     else surface->flags &= ~SFLAG_CONVERTED;
4320
4321     surface_bind_and_dirtify(surface, context, srgb);
4322     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4323     surface->flags |= alloc_flag;
4324 }
4325
4326 /* Context activation is done by the caller. */
4327 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4328 {
4329     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4330     {
4331         struct wined3d_texture *texture = surface->container.u.texture;
4332         UINT sub_count = texture->level_count * texture->layer_count;
4333         UINT i;
4334
4335         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4336
4337         for (i = 0; i < sub_count; ++i)
4338         {
4339             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4340             surface_prepare_texture_internal(s, context, srgb);
4341         }
4342
4343         return;
4344     }
4345
4346     surface_prepare_texture_internal(surface, context, srgb);
4347 }
4348
4349 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4350 {
4351     if (multisample)
4352     {
4353         if (surface->rb_multisample)
4354             return;
4355
4356         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4357         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4358         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4359                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4360         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4361     }
4362     else
4363     {
4364         if (surface->rb_resolved)
4365             return;
4366
4367         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4368         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4369         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4370                 surface->pow2Width, surface->pow2Height);
4371         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4372     }
4373 }
4374
4375 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4376         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4377 {
4378     struct wined3d_device *device = surface->resource.device;
4379     UINT pitch = wined3d_surface_get_pitch(surface);
4380     const struct wined3d_gl_info *gl_info;
4381     struct wined3d_context *context;
4382     RECT local_rect;
4383     UINT w, h;
4384
4385     surface_get_rect(surface, rect, &local_rect);
4386
4387     mem += local_rect.top * pitch + local_rect.left * bpp;
4388     w = local_rect.right - local_rect.left;
4389     h = local_rect.bottom - local_rect.top;
4390
4391     /* Activate the correct context for the render target */
4392     context = context_acquire(device, surface);
4393     context_apply_blit_state(context, device);
4394     gl_info = context->gl_info;
4395
4396     ENTER_GL();
4397
4398     if (!surface_is_offscreen(surface))
4399     {
4400         GLenum buffer = surface_get_gl_buffer(surface);
4401         TRACE("Unlocking %#x buffer.\n", buffer);
4402         context_set_draw_buffer(context, buffer);
4403
4404         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4405         glPixelZoom(1.0f, -1.0f);
4406     }
4407     else
4408     {
4409         /* Primary offscreen render target */
4410         TRACE("Offscreen render target.\n");
4411         context_set_draw_buffer(context, device->offscreenBuffer);
4412
4413         glPixelZoom(1.0f, 1.0f);
4414     }
4415
4416     glRasterPos3i(local_rect.left, local_rect.top, 1);
4417     checkGLcall("glRasterPos3i");
4418
4419     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4420     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4421
4422     if (surface->flags & SFLAG_PBO)
4423     {
4424         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4425         checkGLcall("glBindBufferARB");
4426     }
4427
4428     glDrawPixels(w, h, fmt, type, mem);
4429     checkGLcall("glDrawPixels");
4430
4431     if (surface->flags & SFLAG_PBO)
4432     {
4433         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4434         checkGLcall("glBindBufferARB");
4435     }
4436
4437     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4438     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4439
4440     LEAVE_GL();
4441
4442     if (wined3d_settings.strict_draw_ordering
4443             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4444             && surface->container.u.swapchain->front_buffer == surface))
4445         wglFlush();
4446
4447     context_release(context);
4448 }
4449
4450 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck, BOOL use_texturing,
4451         struct wined3d_format *format, enum wined3d_conversion_type *conversion_type)
4452 {
4453     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4454     const struct wined3d_device *device = surface->resource.device;
4455     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4456     BOOL blit_supported = FALSE;
4457
4458     /* Copy the default values from the surface. Below we might perform fixups */
4459     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4460     *format = *surface->resource.format;
4461     *conversion_type = WINED3D_CT_NONE;
4462
4463     /* Ok, now look if we have to do any conversion */
4464     switch (surface->resource.format->id)
4465     {
4466         case WINED3DFMT_P8_UINT:
4467             /* Below the call to blit_supported is disabled for Wine 1.2
4468              * because the function isn't operating correctly yet. At the
4469              * moment 8-bit blits are handled in software and if certain GL
4470              * extensions are around, surface conversion is performed at
4471              * upload time. The blit_supported call recognizes it as a
4472              * destination fixup. This type of upload 'fixup' and 8-bit to
4473              * 8-bit blits need to be handled by the blit_shader.
4474              * TODO: get rid of this #if 0. */
4475 #if 0
4476             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4477                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4478                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4479 #endif
4480             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4481
4482             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4483              * texturing. Further also use conversion in case of color keying.
4484              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4485              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4486              * conflicts with this.
4487              */
4488             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4489                     || colorkey_active || !use_texturing)
4490             {
4491                 format->glFormat = GL_RGBA;
4492                 format->glInternal = GL_RGBA;
4493                 format->glType = GL_UNSIGNED_BYTE;
4494                 format->conv_byte_count = 4;
4495                 if (colorkey_active)
4496                     *conversion_type = WINED3D_CT_PALETTED_CK;
4497                 else
4498                     *conversion_type = WINED3D_CT_PALETTED;
4499             }
4500             break;
4501
4502         case WINED3DFMT_B2G3R3_UNORM:
4503             /* **********************
4504                 GL_UNSIGNED_BYTE_3_3_2
4505                 ********************** */
4506             if (colorkey_active) {
4507                 /* This texture format will never be used.. So do not care about color keying
4508                     up until the point in time it will be needed :-) */
4509                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4510             }
4511             break;
4512
4513         case WINED3DFMT_B5G6R5_UNORM:
4514             if (colorkey_active)
4515             {
4516                 *conversion_type = WINED3D_CT_CK_565;
4517                 format->glFormat = GL_RGBA;
4518                 format->glInternal = GL_RGB5_A1;
4519                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4520                 format->conv_byte_count = 2;
4521             }
4522             break;
4523
4524         case WINED3DFMT_B5G5R5X1_UNORM:
4525             if (colorkey_active)
4526             {
4527                 *conversion_type = WINED3D_CT_CK_5551;
4528                 format->glFormat = GL_BGRA;
4529                 format->glInternal = GL_RGB5_A1;
4530                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4531                 format->conv_byte_count = 2;
4532             }
4533             break;
4534
4535         case WINED3DFMT_B8G8R8_UNORM:
4536             if (colorkey_active)
4537             {
4538                 *conversion_type = WINED3D_CT_CK_RGB24;
4539                 format->glFormat = GL_RGBA;
4540                 format->glInternal = GL_RGBA8;
4541                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4542                 format->conv_byte_count = 4;
4543             }
4544             break;
4545
4546         case WINED3DFMT_B8G8R8X8_UNORM:
4547             if (colorkey_active)
4548             {
4549                 *conversion_type = WINED3D_CT_RGB32_888;
4550                 format->glFormat = GL_RGBA;
4551                 format->glInternal = GL_RGBA8;
4552                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4553                 format->conv_byte_count = 4;
4554             }
4555             break;
4556
4557         case WINED3DFMT_B8G8R8A8_UNORM:
4558             if (colorkey_active)
4559             {
4560                 *conversion_type = WINED3D_CT_CK_ARGB32;
4561                 format->conv_byte_count = 4;
4562             }
4563             break;
4564
4565         default:
4566             break;
4567     }
4568
4569     if (*conversion_type != WINED3D_CT_NONE)
4570     {
4571         format->rtInternal = format->glInternal;
4572         format->glGammaInternal = format->glInternal;
4573     }
4574
4575     return WINED3D_OK;
4576 }
4577
4578 static BOOL color_in_range(const struct wined3d_color_key *color_key, DWORD color)
4579 {
4580     /* FIXME: Is this really how color keys are supposed to work? I think it
4581      * makes more sense to compare the individual channels. */
4582     return color >= color_key->color_space_low_value
4583             && color <= color_key->color_space_high_value;
4584 }
4585
4586 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4587 {
4588     const struct wined3d_device *device = surface->resource.device;
4589     const struct wined3d_palette *pal = surface->palette;
4590     BOOL index_in_alpha = FALSE;
4591     unsigned int i;
4592
4593     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4594      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4595      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4596      * duplicate entries. Store the color key in the unused alpha component to speed the
4597      * download up and to make conversion unneeded. */
4598     index_in_alpha = primary_render_target_is_p8(device);
4599
4600     if (!pal)
4601     {
4602         ERR("This code should never get entered for DirectDraw!, expect problems\n");
4603         if (index_in_alpha)
4604         {
4605             /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4606              * there's no palette at this time. */
4607             for (i = 0; i < 256; i++) table[i][3] = i;
4608         }
4609     }
4610     else
4611     {
4612         TRACE("Using surface palette %p\n", pal);
4613         /* Get the surface's palette */
4614         for (i = 0; i < 256; ++i)
4615         {
4616             table[i][0] = pal->palents[i].peRed;
4617             table[i][1] = pal->palents[i].peGreen;
4618             table[i][2] = pal->palents[i].peBlue;
4619
4620             /* When index_in_alpha is set the palette index is stored in the
4621              * alpha component. In case of a readback we can then read
4622              * GL_ALPHA. Color keying is handled in BltOverride using a
4623              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4624              * color key itself is passed to glAlphaFunc in other cases the
4625              * alpha component of pixels that should be masked away is set to 0. */
4626             if (index_in_alpha)
4627                 table[i][3] = i;
4628             else if (colorkey && color_in_range(&surface->src_blt_color_key, i))
4629                 table[i][3] = 0x00;
4630             else if (pal->flags & WINEDDPCAPS_ALPHA)
4631                 table[i][3] = pal->palents[i].peFlags;
4632             else
4633                 table[i][3] = 0xFF;
4634         }
4635     }
4636 }
4637
4638 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width, UINT height,
4639         UINT outpitch, enum wined3d_conversion_type conversion_type, struct wined3d_surface *surface)
4640 {
4641     const BYTE *source;
4642     BYTE *dest;
4643
4644     TRACE("src %p, dst %p, pitch %u, width %u, height %u, outpitch %u, conversion_type %#x, surface %p.\n",
4645             src, dst, pitch, width, height, outpitch, conversion_type, surface);
4646
4647     switch (conversion_type)
4648     {
4649         case WINED3D_CT_NONE:
4650         {
4651             memcpy(dst, src, pitch * height);
4652             break;
4653         }
4654
4655         case WINED3D_CT_PALETTED:
4656         case WINED3D_CT_PALETTED_CK:
4657         {
4658             BYTE table[256][4];
4659             unsigned int x, y;
4660
4661             d3dfmt_p8_init_palette(surface, table, (conversion_type == WINED3D_CT_PALETTED_CK));
4662
4663             for (y = 0; y < height; y++)
4664             {
4665                 source = src + pitch * y;
4666                 dest = dst + outpitch * y;
4667                 /* This is an 1 bpp format, using the width here is fine */
4668                 for (x = 0; x < width; x++) {
4669                     BYTE color = *source++;
4670                     *dest++ = table[color][0];
4671                     *dest++ = table[color][1];
4672                     *dest++ = table[color][2];
4673                     *dest++ = table[color][3];
4674                 }
4675             }
4676         }
4677         break;
4678
4679         case WINED3D_CT_CK_565:
4680         {
4681             /* Converting the 565 format in 5551 packed to emulate color-keying.
4682
4683               Note : in all these conversion, it would be best to average the averaging
4684                       pixels to get the color of the pixel that will be color-keyed to
4685                       prevent 'color bleeding'. This will be done later on if ever it is
4686                       too visible.
4687
4688               Note2: Nvidia documents say that their driver does not support alpha + color keying
4689                      on the same surface and disables color keying in such a case
4690             */
4691             unsigned int x, y;
4692             const WORD *Source;
4693             WORD *Dest;
4694
4695             TRACE("Color keyed 565\n");
4696
4697             for (y = 0; y < height; y++) {
4698                 Source = (const WORD *)(src + y * pitch);
4699                 Dest = (WORD *) (dst + y * outpitch);
4700                 for (x = 0; x < width; x++ ) {
4701                     WORD color = *Source++;
4702                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4703                     if (!color_in_range(&surface->src_blt_color_key, color))
4704                         *Dest |= 0x0001;
4705                     Dest++;
4706                 }
4707             }
4708         }
4709         break;
4710
4711         case WINED3D_CT_CK_5551:
4712         {
4713             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4714             unsigned int x, y;
4715             const WORD *Source;
4716             WORD *Dest;
4717             TRACE("Color keyed 5551\n");
4718             for (y = 0; y < height; y++) {
4719                 Source = (const WORD *)(src + y * pitch);
4720                 Dest = (WORD *) (dst + y * outpitch);
4721                 for (x = 0; x < width; x++ ) {
4722                     WORD color = *Source++;
4723                     *Dest = color;
4724                     if (!color_in_range(&surface->src_blt_color_key, color))
4725                         *Dest |= (1 << 15);
4726                     else
4727                         *Dest &= ~(1 << 15);
4728                     Dest++;
4729                 }
4730             }
4731         }
4732         break;
4733
4734         case WINED3D_CT_CK_RGB24:
4735         {
4736             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4737             unsigned int x, y;
4738             for (y = 0; y < height; y++)
4739             {
4740                 source = src + pitch * y;
4741                 dest = dst + outpitch * y;
4742                 for (x = 0; x < width; x++) {
4743                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4744                     DWORD dstcolor = color << 8;
4745                     if (!color_in_range(&surface->src_blt_color_key, color))
4746                         dstcolor |= 0xff;
4747                     *(DWORD*)dest = dstcolor;
4748                     source += 3;
4749                     dest += 4;
4750                 }
4751             }
4752         }
4753         break;
4754
4755         case WINED3D_CT_RGB32_888:
4756         {
4757             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4758             unsigned int x, y;
4759             for (y = 0; y < height; y++)
4760             {
4761                 source = src + pitch * y;
4762                 dest = dst + outpitch * y;
4763                 for (x = 0; x < width; x++) {
4764                     DWORD color = 0xffffff & *(const DWORD*)source;
4765                     DWORD dstcolor = color << 8;
4766                     if (!color_in_range(&surface->src_blt_color_key, color))
4767                         dstcolor |= 0xff;
4768                     *(DWORD*)dest = dstcolor;
4769                     source += 4;
4770                     dest += 4;
4771                 }
4772             }
4773         }
4774         break;
4775
4776         case WINED3D_CT_CK_ARGB32:
4777         {
4778             unsigned int x, y;
4779             for (y = 0; y < height; ++y)
4780             {
4781                 source = src + pitch * y;
4782                 dest = dst + outpitch * y;
4783                 for (x = 0; x < width; ++x)
4784                 {
4785                     DWORD color = *(const DWORD *)source;
4786                     if (color_in_range(&surface->src_blt_color_key, color))
4787                         color &= ~0xff000000;
4788                     *(DWORD*)dest = color;
4789                     source += 4;
4790                     dest += 4;
4791                 }
4792             }
4793         }
4794         break;
4795
4796         default:
4797             ERR("Unsupported conversion type %#x.\n", conversion_type);
4798     }
4799     return WINED3D_OK;
4800 }
4801
4802 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4803 {
4804     /* Flip the surface contents */
4805     /* Flip the DC */
4806     {
4807         HDC tmp;
4808         tmp = front->hDC;
4809         front->hDC = back->hDC;
4810         back->hDC = tmp;
4811     }
4812
4813     /* Flip the DIBsection */
4814     {
4815         HBITMAP tmp = front->dib.DIBsection;
4816         front->dib.DIBsection = back->dib.DIBsection;
4817         back->dib.DIBsection = tmp;
4818     }
4819
4820     /* Flip the surface data */
4821     {
4822         void* tmp;
4823
4824         tmp = front->dib.bitmap_data;
4825         front->dib.bitmap_data = back->dib.bitmap_data;
4826         back->dib.bitmap_data = tmp;
4827
4828         tmp = front->resource.allocatedMemory;
4829         front->resource.allocatedMemory = back->resource.allocatedMemory;
4830         back->resource.allocatedMemory = tmp;
4831
4832         tmp = front->resource.heapMemory;
4833         front->resource.heapMemory = back->resource.heapMemory;
4834         back->resource.heapMemory = tmp;
4835     }
4836
4837     /* Flip the PBO */
4838     {
4839         GLuint tmp_pbo = front->pbo;
4840         front->pbo = back->pbo;
4841         back->pbo = tmp_pbo;
4842     }
4843
4844     /* Flip the opengl texture */
4845     {
4846         GLuint tmp;
4847
4848         tmp = back->texture_name;
4849         back->texture_name = front->texture_name;
4850         front->texture_name = tmp;
4851
4852         tmp = back->texture_name_srgb;
4853         back->texture_name_srgb = front->texture_name_srgb;
4854         front->texture_name_srgb = tmp;
4855
4856         tmp = back->rb_multisample;
4857         back->rb_multisample = front->rb_multisample;
4858         front->rb_multisample = tmp;
4859
4860         tmp = back->rb_resolved;
4861         back->rb_resolved = front->rb_resolved;
4862         front->rb_resolved = tmp;
4863
4864         resource_unload(&back->resource);
4865         resource_unload(&front->resource);
4866     }
4867
4868     {
4869         DWORD tmp_flags = back->flags;
4870         back->flags = front->flags;
4871         front->flags = tmp_flags;
4872     }
4873 }
4874
4875 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4876  * pixel copy calls. */
4877 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4878         const RECT *src_rect, const RECT *dst_rect_in, enum wined3d_texture_filter_type filter)
4879 {
4880     struct wined3d_device *device = dst_surface->resource.device;
4881     float xrel, yrel;
4882     UINT row;
4883     struct wined3d_context *context;
4884     BOOL upsidedown = FALSE;
4885     RECT dst_rect = *dst_rect_in;
4886
4887     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4888      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4889      */
4890     if(dst_rect.top > dst_rect.bottom) {
4891         UINT tmp = dst_rect.bottom;
4892         dst_rect.bottom = dst_rect.top;
4893         dst_rect.top = tmp;
4894         upsidedown = TRUE;
4895     }
4896
4897     context = context_acquire(device, src_surface);
4898     context_apply_blit_state(context, device);
4899     surface_internal_preload(dst_surface, SRGB_RGB);
4900     ENTER_GL();
4901
4902     /* Bind the target texture */
4903     context_bind_texture(context, dst_surface->texture_target, dst_surface->texture_name);
4904     if (surface_is_offscreen(src_surface))
4905     {
4906         TRACE("Reading from an offscreen target\n");
4907         upsidedown = !upsidedown;
4908         glReadBuffer(device->offscreenBuffer);
4909     }
4910     else
4911     {
4912         glReadBuffer(surface_get_gl_buffer(src_surface));
4913     }
4914     checkGLcall("glReadBuffer");
4915
4916     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4917     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4918
4919     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4920     {
4921         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4922
4923         if (filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT)
4924             ERR("Texture filtering not supported in direct blit.\n");
4925     }
4926     else if ((filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT)
4927             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4928     {
4929         ERR("Texture filtering not supported in direct blit\n");
4930     }
4931
4932     if (upsidedown
4933             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4934             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4935     {
4936         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4937
4938         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4939                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4940                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4941                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4942     }
4943     else
4944     {
4945         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4946         /* I have to process this row by row to swap the image,
4947          * otherwise it would be upside down, so stretching in y direction
4948          * doesn't cost extra time
4949          *
4950          * However, stretching in x direction can be avoided if not necessary
4951          */
4952         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4953             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4954             {
4955                 /* Well, that stuff works, but it's very slow.
4956                  * find a better way instead
4957                  */
4958                 UINT col;
4959
4960                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4961                 {
4962                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4963                             dst_rect.left + col /* x offset */, row /* y offset */,
4964                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4965                 }
4966             }
4967             else
4968             {
4969                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4970                         dst_rect.left /* x offset */, row /* y offset */,
4971                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4972             }
4973         }
4974     }
4975     checkGLcall("glCopyTexSubImage2D");
4976
4977     LEAVE_GL();
4978     context_release(context);
4979
4980     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4981      * path is never entered
4982      */
4983     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4984 }
4985
4986 /* Uses the hardware to stretch and flip the image */
4987 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4988         const RECT *src_rect, const RECT *dst_rect_in, enum wined3d_texture_filter_type filter)
4989 {
4990     struct wined3d_device *device = dst_surface->resource.device;
4991     struct wined3d_swapchain *src_swapchain = NULL;
4992     GLuint src, backup = 0;
4993     float left, right, top, bottom; /* Texture coordinates */
4994     UINT fbwidth = src_surface->resource.width;
4995     UINT fbheight = src_surface->resource.height;
4996     struct wined3d_context *context;
4997     GLenum drawBuffer = GL_BACK;
4998     GLenum texture_target;
4999     BOOL noBackBufferBackup;
5000     BOOL src_offscreen;
5001     BOOL upsidedown = FALSE;
5002     RECT dst_rect = *dst_rect_in;
5003
5004     TRACE("Using hwstretch blit\n");
5005     /* Activate the Proper context for reading from the source surface, set it up for blitting */
5006     context = context_acquire(device, src_surface);
5007     context_apply_blit_state(context, device);
5008     surface_internal_preload(dst_surface, SRGB_RGB);
5009
5010     src_offscreen = surface_is_offscreen(src_surface);
5011     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
5012     if (!noBackBufferBackup && !src_surface->texture_name)
5013     {
5014         /* Get it a description */
5015         surface_internal_preload(src_surface, SRGB_RGB);
5016     }
5017     ENTER_GL();
5018
5019     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
5020      * This way we don't have to wait for the 2nd readback to finish to leave this function.
5021      */
5022     if (context->aux_buffers >= 2)
5023     {
5024         /* Got more than one aux buffer? Use the 2nd aux buffer */
5025         drawBuffer = GL_AUX1;
5026     }
5027     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
5028     {
5029         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
5030         drawBuffer = GL_AUX0;
5031     }
5032
5033     if(noBackBufferBackup) {
5034         glGenTextures(1, &backup);
5035         checkGLcall("glGenTextures");
5036         context_bind_texture(context, GL_TEXTURE_2D, backup);
5037         texture_target = GL_TEXTURE_2D;
5038     } else {
5039         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
5040          * we are reading from the back buffer, the backup can be used as source texture
5041          */
5042         texture_target = src_surface->texture_target;
5043         context_bind_texture(context, texture_target, src_surface->texture_name);
5044         glEnable(texture_target);
5045         checkGLcall("glEnable(texture_target)");
5046
5047         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
5048         src_surface->flags &= ~SFLAG_INTEXTURE;
5049     }
5050
5051     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
5052      * glCopyTexSubImage is a bit picky about the parameters we pass to it
5053      */
5054     if(dst_rect.top > dst_rect.bottom) {
5055         UINT tmp = dst_rect.bottom;
5056         dst_rect.bottom = dst_rect.top;
5057         dst_rect.top = tmp;
5058         upsidedown = TRUE;
5059     }
5060
5061     if (src_offscreen)
5062     {
5063         TRACE("Reading from an offscreen target\n");
5064         upsidedown = !upsidedown;
5065         glReadBuffer(device->offscreenBuffer);
5066     }
5067     else
5068     {
5069         glReadBuffer(surface_get_gl_buffer(src_surface));
5070     }
5071
5072     /* TODO: Only back up the part that will be overwritten */
5073     glCopyTexSubImage2D(texture_target, 0,
5074                         0, 0 /* read offsets */,
5075                         0, 0,
5076                         fbwidth,
5077                         fbheight);
5078
5079     checkGLcall("glCopyTexSubImage2D");
5080
5081     /* No issue with overriding these - the sampler is dirty due to blit usage */
5082     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
5083             wined3d_gl_mag_filter(magLookup, filter));
5084     checkGLcall("glTexParameteri");
5085     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
5086             wined3d_gl_min_mip_filter(minMipLookup, filter, WINED3D_TEXF_NONE));
5087     checkGLcall("glTexParameteri");
5088
5089     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5090         src_swapchain = src_surface->container.u.swapchain;
5091     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
5092     {
5093         src = backup ? backup : src_surface->texture_name;
5094     }
5095     else
5096     {
5097         glReadBuffer(GL_FRONT);
5098         checkGLcall("glReadBuffer(GL_FRONT)");
5099
5100         glGenTextures(1, &src);
5101         checkGLcall("glGenTextures(1, &src)");
5102         context_bind_texture(context, GL_TEXTURE_2D, src);
5103
5104         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5105          * out for power of 2 sizes
5106          */
5107         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5108                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5109         checkGLcall("glTexImage2D");
5110         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
5111                             0, 0 /* read offsets */,
5112                             0, 0,
5113                             fbwidth,
5114                             fbheight);
5115
5116         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5117         checkGLcall("glTexParameteri");
5118         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5119         checkGLcall("glTexParameteri");
5120
5121         glReadBuffer(GL_BACK);
5122         checkGLcall("glReadBuffer(GL_BACK)");
5123
5124         if(texture_target != GL_TEXTURE_2D) {
5125             glDisable(texture_target);
5126             glEnable(GL_TEXTURE_2D);
5127             texture_target = GL_TEXTURE_2D;
5128         }
5129     }
5130     checkGLcall("glEnd and previous");
5131
5132     left = src_rect->left;
5133     right = src_rect->right;
5134
5135     if (!upsidedown)
5136     {
5137         top = src_surface->resource.height - src_rect->top;
5138         bottom = src_surface->resource.height - src_rect->bottom;
5139     }
5140     else
5141     {
5142         top = src_surface->resource.height - src_rect->bottom;
5143         bottom = src_surface->resource.height - src_rect->top;
5144     }
5145
5146     if (src_surface->flags & SFLAG_NORMCOORD)
5147     {
5148         left /= src_surface->pow2Width;
5149         right /= src_surface->pow2Width;
5150         top /= src_surface->pow2Height;
5151         bottom /= src_surface->pow2Height;
5152     }
5153
5154     /* draw the source texture stretched and upside down. The correct surface is bound already */
5155     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5156     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5157
5158     context_set_draw_buffer(context, drawBuffer);
5159     glReadBuffer(drawBuffer);
5160
5161     glBegin(GL_QUADS);
5162         /* bottom left */
5163         glTexCoord2f(left, bottom);
5164         glVertex2i(0, 0);
5165
5166         /* top left */
5167         glTexCoord2f(left, top);
5168         glVertex2i(0, dst_rect.bottom - dst_rect.top);
5169
5170         /* top right */
5171         glTexCoord2f(right, top);
5172         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5173
5174         /* bottom right */
5175         glTexCoord2f(right, bottom);
5176         glVertex2i(dst_rect.right - dst_rect.left, 0);
5177     glEnd();
5178     checkGLcall("glEnd and previous");
5179
5180     if (texture_target != dst_surface->texture_target)
5181     {
5182         glDisable(texture_target);
5183         glEnable(dst_surface->texture_target);
5184         texture_target = dst_surface->texture_target;
5185     }
5186
5187     /* Now read the stretched and upside down image into the destination texture */
5188     context_bind_texture(context, texture_target, dst_surface->texture_name);
5189     glCopyTexSubImage2D(texture_target,
5190                         0,
5191                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5192                         0, 0, /* We blitted the image to the origin */
5193                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5194     checkGLcall("glCopyTexSubImage2D");
5195
5196     if(drawBuffer == GL_BACK) {
5197         /* Write the back buffer backup back */
5198         if(backup) {
5199             if(texture_target != GL_TEXTURE_2D) {
5200                 glDisable(texture_target);
5201                 glEnable(GL_TEXTURE_2D);
5202                 texture_target = GL_TEXTURE_2D;
5203             }
5204             context_bind_texture(context, GL_TEXTURE_2D, backup);
5205         }
5206         else
5207         {
5208             if (texture_target != src_surface->texture_target)
5209             {
5210                 glDisable(texture_target);
5211                 glEnable(src_surface->texture_target);
5212                 texture_target = src_surface->texture_target;
5213             }
5214             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5215         }
5216
5217         glBegin(GL_QUADS);
5218             /* top left */
5219             glTexCoord2f(0.0f, 0.0f);
5220             glVertex2i(0, fbheight);
5221
5222             /* bottom left */
5223             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5224             glVertex2i(0, 0);
5225
5226             /* bottom right */
5227             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5228                     (float)fbheight / (float)src_surface->pow2Height);
5229             glVertex2i(fbwidth, 0);
5230
5231             /* top right */
5232             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5233             glVertex2i(fbwidth, fbheight);
5234         glEnd();
5235     }
5236     glDisable(texture_target);
5237     checkGLcall("glDisable(texture_target)");
5238
5239     /* Cleanup */
5240     if (src != src_surface->texture_name && src != backup)
5241     {
5242         glDeleteTextures(1, &src);
5243         checkGLcall("glDeleteTextures(1, &src)");
5244     }
5245     if(backup) {
5246         glDeleteTextures(1, &backup);
5247         checkGLcall("glDeleteTextures(1, &backup)");
5248     }
5249
5250     LEAVE_GL();
5251
5252     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5253
5254     context_release(context);
5255
5256     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5257      * path is never entered
5258      */
5259     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5260 }
5261
5262 /* Front buffer coordinates are always full screen coordinates, but our GL
5263  * drawable is limited to the window's client area. The sysmem and texture
5264  * copies do have the full screen size. Note that GL has a bottom-left
5265  * origin, while D3D has a top-left origin. */
5266 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5267 {
5268     UINT drawable_height;
5269
5270     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5271             && surface == surface->container.u.swapchain->front_buffer)
5272     {
5273         POINT offset = {0, 0};
5274         RECT windowsize;
5275
5276         ScreenToClient(window, &offset);
5277         OffsetRect(rect, offset.x, offset.y);
5278
5279         GetClientRect(window, &windowsize);
5280         drawable_height = windowsize.bottom - windowsize.top;
5281     }
5282     else
5283     {
5284         drawable_height = surface->resource.height;
5285     }
5286
5287     rect->top = drawable_height - rect->top;
5288     rect->bottom = drawable_height - rect->bottom;
5289 }
5290
5291 static void surface_blt_to_drawable(const struct wined3d_device *device,
5292         enum wined3d_texture_filter_type filter, BOOL color_key,
5293         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5294         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5295 {
5296     struct wined3d_context *context;
5297     RECT src_rect, dst_rect;
5298
5299     src_rect = *src_rect_in;
5300     dst_rect = *dst_rect_in;
5301
5302     /* Make sure the surface is up-to-date. This should probably use
5303      * surface_load_location() and worry about the destination surface too,
5304      * unless we're overwriting it completely. */
5305     surface_internal_preload(src_surface, SRGB_RGB);
5306
5307     /* Activate the destination context, set it up for blitting */
5308     context = context_acquire(device, dst_surface);
5309     context_apply_blit_state(context, device);
5310
5311     if (!surface_is_offscreen(dst_surface))
5312         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5313
5314     device->blitter->set_shader(device->blit_priv, context, src_surface);
5315
5316     ENTER_GL();
5317
5318     if (color_key)
5319     {
5320         glEnable(GL_ALPHA_TEST);
5321         checkGLcall("glEnable(GL_ALPHA_TEST)");
5322
5323         /* When the primary render target uses P8, the alpha component
5324          * contains the palette index. Which means that the colorkey is one of
5325          * the palette entries. In other cases pixels that should be masked
5326          * away have alpha set to 0. */
5327         if (primary_render_target_is_p8(device))
5328             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->src_blt_color_key.color_space_low_value / 256.0f);
5329         else
5330             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5331         checkGLcall("glAlphaFunc");
5332     }
5333     else
5334     {
5335         glDisable(GL_ALPHA_TEST);
5336         checkGLcall("glDisable(GL_ALPHA_TEST)");
5337     }
5338
5339     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5340
5341     if (color_key)
5342     {
5343         glDisable(GL_ALPHA_TEST);
5344         checkGLcall("glDisable(GL_ALPHA_TEST)");
5345     }
5346
5347     LEAVE_GL();
5348
5349     /* Leave the opengl state valid for blitting */
5350     device->blitter->unset_shader(context->gl_info);
5351
5352     if (wined3d_settings.strict_draw_ordering
5353             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5354             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5355         wglFlush(); /* Flush to ensure ordering across contexts. */
5356
5357     context_release(context);
5358 }
5359
5360 /* Do not call while under the GL lock. */
5361 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const struct wined3d_color *color)
5362 {
5363     struct wined3d_device *device = s->resource.device;
5364     const struct blit_shader *blitter;
5365
5366     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5367             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5368     if (!blitter)
5369     {
5370         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5371         return WINED3DERR_INVALIDCALL;
5372     }
5373
5374     return blitter->color_fill(device, s, rect, color);
5375 }
5376
5377 /* Do not call while under the GL lock. */
5378 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5379         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5380         enum wined3d_texture_filter_type filter)
5381 {
5382     struct wined3d_device *device = dst_surface->resource.device;
5383     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5384     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5385
5386     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5387             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5388             flags, DDBltFx, debug_d3dtexturefiltertype(filter));
5389
5390     /* Get the swapchain. One of the surfaces has to be a primary surface */
5391     if (dst_surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
5392     {
5393         WARN("Destination is in sysmem, rejecting gl blt\n");
5394         return WINED3DERR_INVALIDCALL;
5395     }
5396
5397     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5398         dstSwapchain = dst_surface->container.u.swapchain;
5399
5400     if (src_surface)
5401     {
5402         if (src_surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
5403         {
5404             WARN("Src is in sysmem, rejecting gl blt\n");
5405             return WINED3DERR_INVALIDCALL;
5406         }
5407
5408         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5409             srcSwapchain = src_surface->container.u.swapchain;
5410     }
5411
5412     /* Early sort out of cases where no render target is used */
5413     if (!dstSwapchain && !srcSwapchain
5414             && src_surface != device->fb.render_targets[0]
5415             && dst_surface != device->fb.render_targets[0])
5416     {
5417         TRACE("No surface is render target, not using hardware blit.\n");
5418         return WINED3DERR_INVALIDCALL;
5419     }
5420
5421     /* No destination color keying supported */
5422     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5423     {
5424         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5425         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5426         return WINED3DERR_INVALIDCALL;
5427     }
5428
5429     if (dstSwapchain && dstSwapchain == srcSwapchain)
5430     {
5431         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5432         return WINED3DERR_INVALIDCALL;
5433     }
5434
5435     if (dstSwapchain && srcSwapchain)
5436     {
5437         FIXME("Implement hardware blit between two different swapchains\n");
5438         return WINED3DERR_INVALIDCALL;
5439     }
5440
5441     if (dstSwapchain)
5442     {
5443         /* Handled with regular texture -> swapchain blit */
5444         if (src_surface == device->fb.render_targets[0])
5445             TRACE("Blit from active render target to a swapchain\n");
5446     }
5447     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5448     {
5449         FIXME("Implement blit from a swapchain to the active render target\n");
5450         return WINED3DERR_INVALIDCALL;
5451     }
5452
5453     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5454     {
5455         /* Blit from render target to texture */
5456         BOOL stretchx;
5457
5458         /* P8 read back is not implemented */
5459         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5460                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5461         {
5462             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5463             return WINED3DERR_INVALIDCALL;
5464         }
5465
5466         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5467         {
5468             TRACE("Color keying not supported by frame buffer to texture blit\n");
5469             return WINED3DERR_INVALIDCALL;
5470             /* Destination color key is checked above */
5471         }
5472
5473         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5474             stretchx = TRUE;
5475         else
5476             stretchx = FALSE;
5477
5478         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5479          * flip the image nor scale it.
5480          *
5481          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5482          * -> If the app wants a image width an unscaled width, copy it line per line
5483          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5484          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5485          *    back buffer. This is slower than reading line per line, thus not used for flipping
5486          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5487          *    pixel by pixel. */
5488         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5489                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5490         {
5491             TRACE("No stretching in x direction, using direct framebuffer -> texture copy.\n");
5492             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, filter);
5493         }
5494         else
5495         {
5496             TRACE("Using hardware stretching to flip / stretch the texture.\n");
5497             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, filter);
5498         }
5499
5500         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5501         {
5502             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5503             dst_surface->resource.allocatedMemory = NULL;
5504             dst_surface->resource.heapMemory = NULL;
5505         }
5506         else
5507         {
5508             dst_surface->flags &= ~SFLAG_INSYSMEM;
5509         }
5510
5511         return WINED3D_OK;
5512     }
5513     else if (src_surface)
5514     {
5515         /* Blit from offscreen surface to render target */
5516         struct wined3d_color_key old_blt_key = src_surface->src_blt_color_key;
5517         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5518
5519         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5520
5521         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5522                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5523                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5524         {
5525             FIXME("Unsupported blit operation falling back to software\n");
5526             return WINED3DERR_INVALIDCALL;
5527         }
5528
5529         /* Color keying: Check if we have to do a color keyed blt,
5530          * and if not check if a color key is activated.
5531          *
5532          * Just modify the color keying parameters in the surface and restore them afterwards
5533          * The surface keeps track of the color key last used to load the opengl surface.
5534          * PreLoad will catch the change to the flags and color key and reload if necessary.
5535          */
5536         if (flags & WINEDDBLT_KEYSRC)
5537         {
5538             /* Use color key from surface */
5539         }
5540         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5541         {
5542             /* Use color key from DDBltFx */
5543             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5544             src_surface->src_blt_color_key = DDBltFx->ddckSrcColorkey;
5545         }
5546         else
5547         {
5548             /* Do not use color key */
5549             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5550         }
5551
5552         surface_blt_to_drawable(device, filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5553                 src_surface, src_rect, dst_surface, dst_rect);
5554
5555         /* Restore the color key parameters */
5556         src_surface->CKeyFlags = oldCKeyFlags;
5557         src_surface->src_blt_color_key = old_blt_key;
5558
5559         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5560
5561         return WINED3D_OK;
5562     }
5563
5564     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5565     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5566     return WINED3DERR_INVALIDCALL;
5567 }
5568
5569 /* GL locking is done by the caller */
5570 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5571         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5572 {
5573     struct wined3d_device *device = surface->resource.device;
5574     const struct wined3d_gl_info *gl_info = context->gl_info;
5575     GLint compare_mode = GL_NONE;
5576     struct blt_info info;
5577     GLint old_binding = 0;
5578     RECT rect;
5579
5580     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5581
5582     glDisable(GL_CULL_FACE);
5583     glDisable(GL_BLEND);
5584     glDisable(GL_ALPHA_TEST);
5585     glDisable(GL_SCISSOR_TEST);
5586     glDisable(GL_STENCIL_TEST);
5587     glEnable(GL_DEPTH_TEST);
5588     glDepthFunc(GL_ALWAYS);
5589     glDepthMask(GL_TRUE);
5590     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5591     glViewport(x, y, w, h);
5592     glDepthRange(0.0, 1.0);
5593
5594     SetRect(&rect, 0, h, w, 0);
5595     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5596     context_active_texture(context, context->gl_info, 0);
5597     glGetIntegerv(info.binding, &old_binding);
5598     glBindTexture(info.bind_target, texture);
5599     if (gl_info->supported[ARB_SHADOW])
5600     {
5601         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5602         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5603     }
5604
5605     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5606             gl_info, info.tex_type, &surface->ds_current_size);
5607
5608     glBegin(GL_TRIANGLE_STRIP);
5609     glTexCoord3fv(info.coords[0]);
5610     glVertex2f(-1.0f, -1.0f);
5611     glTexCoord3fv(info.coords[1]);
5612     glVertex2f(1.0f, -1.0f);
5613     glTexCoord3fv(info.coords[2]);
5614     glVertex2f(-1.0f, 1.0f);
5615     glTexCoord3fv(info.coords[3]);
5616     glVertex2f(1.0f, 1.0f);
5617     glEnd();
5618
5619     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5620     glBindTexture(info.bind_target, old_binding);
5621
5622     glPopAttrib();
5623
5624     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5625 }
5626
5627 void surface_modify_ds_location(struct wined3d_surface *surface,
5628         DWORD location, UINT w, UINT h)
5629 {
5630     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5631
5632     if (location & ~(SFLAG_LOCATIONS | SFLAG_DISCARDED))
5633         FIXME("Invalid location (%#x) specified.\n", location);
5634
5635     if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5636             || (!(surface->flags & SFLAG_INTEXTURE) && (location & SFLAG_INTEXTURE)))
5637     {
5638         if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5639         {
5640             TRACE("Passing to container.\n");
5641             wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5642         }
5643     }
5644
5645     surface->ds_current_size.cx = w;
5646     surface->ds_current_size.cy = h;
5647     surface->flags &= ~(SFLAG_LOCATIONS | SFLAG_DISCARDED);
5648     surface->flags |= location;
5649 }
5650
5651 /* Context activation is done by the caller. */
5652 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5653 {
5654     struct wined3d_device *device = surface->resource.device;
5655     GLsizei w, h;
5656
5657     TRACE("surface %p, new location %#x.\n", surface, location);
5658
5659     /* TODO: Make this work for modes other than FBO */
5660     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5661
5662     if (!(surface->flags & location))
5663     {
5664         w = surface->ds_current_size.cx;
5665         h = surface->ds_current_size.cy;
5666         surface->ds_current_size.cx = 0;
5667         surface->ds_current_size.cy = 0;
5668     }
5669     else
5670     {
5671         w = surface->resource.width;
5672         h = surface->resource.height;
5673     }
5674
5675     if (surface->ds_current_size.cx == surface->resource.width
5676             && surface->ds_current_size.cy == surface->resource.height)
5677     {
5678         TRACE("Location (%#x) is already up to date.\n", location);
5679         return;
5680     }
5681
5682     if (surface->current_renderbuffer)
5683     {
5684         FIXME("Not supported with fixed up depth stencil.\n");
5685         return;
5686     }
5687
5688     if (surface->flags & SFLAG_DISCARDED)
5689     {
5690         TRACE("Surface was discarded, no need copy data.\n");
5691         switch (location)
5692         {
5693             case SFLAG_INTEXTURE:
5694                 surface_prepare_texture(surface, context, FALSE);
5695                 break;
5696             case SFLAG_INRB_MULTISAMPLE:
5697                 surface_prepare_rb(surface, context->gl_info, TRUE);
5698                 break;
5699             case SFLAG_INDRAWABLE:
5700                 /* Nothing to do */
5701                 break;
5702             default:
5703                 FIXME("Unhandled location %#x\n", location);
5704         }
5705         surface->flags &= ~SFLAG_DISCARDED;
5706         surface->flags |= location;
5707         surface->ds_current_size.cx = surface->resource.width;
5708         surface->ds_current_size.cy = surface->resource.height;
5709         return;
5710     }
5711
5712     if (!(surface->flags & SFLAG_LOCATIONS))
5713     {
5714         FIXME("No up to date depth stencil location.\n");
5715         surface->flags |= location;
5716         surface->ds_current_size.cx = surface->resource.width;
5717         surface->ds_current_size.cy = surface->resource.height;
5718         return;
5719     }
5720
5721     if (location == SFLAG_INTEXTURE)
5722     {
5723         GLint old_binding = 0;
5724         GLenum bind_target;
5725
5726         /* The render target is allowed to be smaller than the depth/stencil
5727          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5728          * than the offscreen surface. Don't overwrite the offscreen surface
5729          * with undefined data. */
5730         w = min(w, context->swapchain->desc.backbuffer_width);
5731         h = min(h, context->swapchain->desc.backbuffer_height);
5732
5733         TRACE("Copying onscreen depth buffer to depth texture.\n");
5734
5735         ENTER_GL();
5736
5737         if (!device->depth_blt_texture)
5738         {
5739             glGenTextures(1, &device->depth_blt_texture);
5740         }
5741
5742         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5743          * directly on the FBO texture. That's because we need to flip. */
5744         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5745                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5746         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5747         {
5748             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5749             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5750         }
5751         else
5752         {
5753             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5754             bind_target = GL_TEXTURE_2D;
5755         }
5756         glBindTexture(bind_target, device->depth_blt_texture);
5757         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5758          * internal format, because the internal format might include stencil
5759          * data. In principle we should copy stencil data as well, but unless
5760          * the driver supports stencil export it's hard to do, and doesn't
5761          * seem to be needed in practice. If the hardware doesn't support
5762          * writing stencil data, the glCopyTexImage2D() call might trigger
5763          * software fallbacks. */
5764         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5765         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5766         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5767         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5768         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5769         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5770         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5771         glBindTexture(bind_target, old_binding);
5772
5773         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5774                 NULL, surface, SFLAG_INTEXTURE);
5775         context_set_draw_buffer(context, GL_NONE);
5776         glReadBuffer(GL_NONE);
5777
5778         /* Do the actual blit */
5779         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5780         checkGLcall("depth_blt");
5781
5782         context_invalidate_state(context, STATE_FRAMEBUFFER);
5783
5784         LEAVE_GL();
5785
5786         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5787     }
5788     else if (location == SFLAG_INDRAWABLE)
5789     {
5790         TRACE("Copying depth texture to onscreen depth buffer.\n");
5791
5792         ENTER_GL();
5793
5794         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5795                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5796         surface_depth_blt(surface, context, surface->texture_name,
5797                 0, surface->pow2Height - h, w, h, surface->texture_target);
5798         checkGLcall("depth_blt");
5799
5800         context_invalidate_state(context, STATE_FRAMEBUFFER);
5801
5802         LEAVE_GL();
5803
5804         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5805     }
5806     else
5807     {
5808         ERR("Invalid location (%#x) specified.\n", location);
5809     }
5810
5811     surface->flags |= location;
5812     surface->ds_current_size.cx = surface->resource.width;
5813     surface->ds_current_size.cy = surface->resource.height;
5814 }
5815
5816 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5817 {
5818     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5819     struct wined3d_surface *overlay;
5820
5821     TRACE("surface %p, location %s, persistent %#x.\n",
5822             surface, debug_surflocation(location), persistent);
5823
5824     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5825             && !(surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
5826             && (location & SFLAG_INDRAWABLE))
5827         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5828
5829     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5830             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5831         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5832
5833     if (persistent)
5834     {
5835         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5836                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5837         {
5838             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5839             {
5840                 TRACE("Passing to container.\n");
5841                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5842             }
5843         }
5844         surface->flags &= ~SFLAG_LOCATIONS;
5845         surface->flags |= location;
5846
5847         /* Redraw emulated overlays, if any */
5848         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5849         {
5850             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5851             {
5852                 surface_draw_overlay(overlay);
5853             }
5854         }
5855     }
5856     else
5857     {
5858         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5859         {
5860             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5861             {
5862                 TRACE("Passing to container\n");
5863                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5864             }
5865         }
5866         surface->flags &= ~location;
5867     }
5868
5869     if (!(surface->flags & SFLAG_LOCATIONS))
5870     {
5871         ERR("Surface %p does not have any up to date location.\n", surface);
5872     }
5873 }
5874
5875 static DWORD resource_access_from_location(DWORD location)
5876 {
5877     switch (location)
5878     {
5879         case SFLAG_INSYSMEM:
5880             return WINED3D_RESOURCE_ACCESS_CPU;
5881
5882         case SFLAG_INDRAWABLE:
5883         case SFLAG_INSRGBTEX:
5884         case SFLAG_INTEXTURE:
5885         case SFLAG_INRB_MULTISAMPLE:
5886         case SFLAG_INRB_RESOLVED:
5887             return WINED3D_RESOURCE_ACCESS_GPU;
5888
5889         default:
5890             FIXME("Unhandled location %#x.\n", location);
5891             return 0;
5892     }
5893 }
5894
5895 static void surface_load_sysmem(struct wined3d_surface *surface,
5896         const struct wined3d_gl_info *gl_info, const RECT *rect)
5897 {
5898     surface_prepare_system_memory(surface);
5899
5900     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED))
5901         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5902
5903     /* Download the surface to system memory. */
5904     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5905     {
5906         struct wined3d_device *device = surface->resource.device;
5907         struct wined3d_context *context;
5908
5909         /* TODO: Use already acquired context when possible. */
5910         context = context_acquire(device, NULL);
5911
5912         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5913         surface_download_data(surface, gl_info);
5914
5915         context_release(context);
5916
5917         return;
5918     }
5919
5920     if (surface->flags & SFLAG_INDRAWABLE)
5921     {
5922         read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5923                 wined3d_surface_get_pitch(surface));
5924         return;
5925     }
5926
5927     FIXME("Can't load surface %p with location flags %#x into sysmem.\n",
5928             surface, surface->flags & SFLAG_LOCATIONS);
5929 }
5930
5931 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5932         const struct wined3d_gl_info *gl_info, const RECT *rect)
5933 {
5934     struct wined3d_device *device = surface->resource.device;
5935     enum wined3d_conversion_type convert;
5936     struct wined3d_format format;
5937     UINT byte_count;
5938     BYTE *mem;
5939
5940     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5941     {
5942         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5943         return WINED3DERR_INVALIDCALL;
5944     }
5945
5946     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5947         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5948
5949     if (surface->flags & SFLAG_INTEXTURE)
5950     {
5951         RECT r;
5952
5953         surface_get_rect(surface, rect, &r);
5954         surface_blt_to_drawable(device, WINED3D_TEXF_POINT, FALSE, surface, &r, surface, &r);
5955
5956         return WINED3D_OK;
5957     }
5958
5959     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5960     {
5961         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5962          * path through sysmem. */
5963         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5964     }
5965
5966     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5967
5968     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5969      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5970      * called. */
5971     if ((convert != WINED3D_CT_NONE) && (surface->flags & SFLAG_PBO))
5972     {
5973         struct wined3d_context *context;
5974
5975         TRACE("Removing the pbo attached to surface %p.\n", surface);
5976
5977         /* TODO: Use already acquired context when possible. */
5978         context = context_acquire(device, NULL);
5979
5980         surface_remove_pbo(surface, gl_info);
5981
5982         context_release(context);
5983     }
5984
5985     if ((convert != WINED3D_CT_NONE) && surface->resource.allocatedMemory)
5986     {
5987         UINT height = surface->resource.height;
5988         UINT width = surface->resource.width;
5989         UINT src_pitch, dst_pitch;
5990
5991         byte_count = format.conv_byte_count;
5992         src_pitch = wined3d_surface_get_pitch(surface);
5993
5994         /* Stick to the alignment for the converted surface too, makes it
5995          * easier to load the surface. */
5996         dst_pitch = width * byte_count;
5997         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5998
5999         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6000         {
6001             ERR("Out of memory (%u).\n", dst_pitch * height);
6002             return E_OUTOFMEMORY;
6003         }
6004
6005         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
6006                 src_pitch, width, height, dst_pitch, convert, surface);
6007
6008         surface->flags |= SFLAG_CONVERTED;
6009     }
6010     else
6011     {
6012         surface->flags &= ~SFLAG_CONVERTED;
6013         mem = surface->resource.allocatedMemory;
6014         byte_count = format.byte_count;
6015     }
6016
6017     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
6018
6019     /* Don't delete PBO memory. */
6020     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6021         HeapFree(GetProcessHeap(), 0, mem);
6022
6023     return WINED3D_OK;
6024 }
6025
6026 static HRESULT surface_load_texture(struct wined3d_surface *surface,
6027         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
6028 {
6029     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
6030     struct wined3d_device *device = surface->resource.device;
6031     enum wined3d_conversion_type convert;
6032     struct wined3d_context *context;
6033     UINT width, src_pitch, dst_pitch;
6034     struct wined3d_bo_address data;
6035     struct wined3d_format format;
6036     POINT dst_point = {0, 0};
6037     BYTE *mem;
6038
6039     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
6040             && surface_is_offscreen(surface)
6041             && (surface->flags & SFLAG_INDRAWABLE))
6042     {
6043         surface_load_fb_texture(surface, srgb);
6044
6045         return WINED3D_OK;
6046     }
6047
6048     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
6049             && (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB)
6050             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6051                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6052                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6053     {
6054         if (srgb)
6055             surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, SFLAG_INTEXTURE,
6056                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
6057         else
6058             surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, SFLAG_INSRGBTEX,
6059                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
6060
6061         return WINED3D_OK;
6062     }
6063
6064     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED)
6065             && (!srgb || (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB))
6066             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6067                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6068                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6069     {
6070         DWORD src_location = surface->flags & SFLAG_INRB_RESOLVED ? SFLAG_INRB_RESOLVED : SFLAG_INRB_MULTISAMPLE;
6071         DWORD dst_location = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
6072         RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6073
6074         surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, src_location,
6075                 &rect, surface, dst_location, &rect);
6076
6077         return WINED3D_OK;
6078     }
6079
6080     /* Upload from system memory */
6081
6082     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
6083             TRUE /* We will use textures */, &format, &convert);
6084
6085     if (srgb)
6086     {
6087         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
6088         {
6089             /* Performance warning... */
6090             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6091             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6092         }
6093     }
6094     else
6095     {
6096         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6097         {
6098             /* Performance warning... */
6099             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6100             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6101         }
6102     }
6103
6104     if (!(surface->flags & SFLAG_INSYSMEM))
6105     {
6106         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6107         /* Lets hope we get it from somewhere... */
6108         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6109     }
6110
6111     /* TODO: Use already acquired context when possible. */
6112     context = context_acquire(device, NULL);
6113
6114     surface_prepare_texture(surface, context, srgb);
6115     surface_bind_and_dirtify(surface, context, srgb);
6116
6117     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6118     {
6119         surface->flags |= SFLAG_GLCKEY;
6120         surface->gl_color_key = surface->src_blt_color_key;
6121     }
6122     else surface->flags &= ~SFLAG_GLCKEY;
6123
6124     width = surface->resource.width;
6125     src_pitch = wined3d_surface_get_pitch(surface);
6126
6127     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6128      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6129      * called. */
6130     if ((convert != WINED3D_CT_NONE || format.convert) && (surface->flags & SFLAG_PBO))
6131     {
6132         TRACE("Removing the pbo attached to surface %p.\n", surface);
6133         surface_remove_pbo(surface, gl_info);
6134     }
6135
6136     if (format.convert)
6137     {
6138         /* This code is entered for texture formats which need a fixup. */
6139         UINT height = surface->resource.height;
6140
6141         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6142         dst_pitch = width * format.conv_byte_count;
6143         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6144
6145         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6146         {
6147             ERR("Out of memory (%u).\n", dst_pitch * height);
6148             context_release(context);
6149             return E_OUTOFMEMORY;
6150         }
6151         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6152         format.byte_count = format.conv_byte_count;
6153         src_pitch = dst_pitch;
6154     }
6155     else if (convert != WINED3D_CT_NONE && surface->resource.allocatedMemory)
6156     {
6157         /* This code is only entered for color keying fixups */
6158         UINT height = surface->resource.height;
6159
6160         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6161         dst_pitch = width * format.conv_byte_count;
6162         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6163
6164         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6165         {
6166             ERR("Out of memory (%u).\n", dst_pitch * height);
6167             context_release(context);
6168             return E_OUTOFMEMORY;
6169         }
6170         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6171                 width, height, dst_pitch, convert, surface);
6172         format.byte_count = format.conv_byte_count;
6173         src_pitch = dst_pitch;
6174     }
6175     else
6176     {
6177         mem = surface->resource.allocatedMemory;
6178     }
6179
6180     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6181     data.addr = mem;
6182     surface_upload_data(surface, gl_info, &format, &src_rect, src_pitch, &dst_point, srgb, &data);
6183
6184     context_release(context);
6185
6186     /* Don't delete PBO memory. */
6187     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6188         HeapFree(GetProcessHeap(), 0, mem);
6189
6190     return WINED3D_OK;
6191 }
6192
6193 static void surface_multisample_resolve(struct wined3d_surface *surface)
6194 {
6195     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6196
6197     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6198         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6199
6200     surface_blt_fbo(surface->resource.device, WINED3D_TEXF_POINT,
6201             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6202 }
6203
6204 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6205 {
6206     struct wined3d_device *device = surface->resource.device;
6207     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6208     HRESULT hr;
6209
6210     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6211
6212     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6213     {
6214         if (location == SFLAG_INTEXTURE)
6215         {
6216             struct wined3d_context *context = context_acquire(device, NULL);
6217             surface_load_ds_location(surface, context, location);
6218             context_release(context);
6219             return WINED3D_OK;
6220         }
6221         else
6222         {
6223             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6224             return WINED3DERR_INVALIDCALL;
6225         }
6226     }
6227
6228     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6229         location = SFLAG_INTEXTURE;
6230
6231     if (surface->flags & location)
6232     {
6233         TRACE("Location already up to date.\n");
6234
6235         if (location == SFLAG_INSYSMEM && !(surface->flags & SFLAG_PBO)
6236                 && surface_need_pbo(surface, gl_info))
6237             surface_load_pbo(surface, gl_info);
6238
6239         return WINED3D_OK;
6240     }
6241
6242     if (WARN_ON(d3d_surface))
6243     {
6244         DWORD required_access = resource_access_from_location(location);
6245         if ((surface->resource.access_flags & required_access) != required_access)
6246             WARN("Operation requires %#x access, but surface only has %#x.\n",
6247                     required_access, surface->resource.access_flags);
6248     }
6249
6250     if (!(surface->flags & SFLAG_LOCATIONS))
6251     {
6252         ERR("Surface %p does not have any up to date location.\n", surface);
6253         surface->flags |= SFLAG_LOST;
6254         return WINED3DERR_DEVICELOST;
6255     }
6256
6257     switch (location)
6258     {
6259         case SFLAG_INSYSMEM:
6260             surface_load_sysmem(surface, gl_info, rect);
6261             break;
6262
6263         case SFLAG_INDRAWABLE:
6264             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6265                 return hr;
6266             break;
6267
6268         case SFLAG_INRB_RESOLVED:
6269             surface_multisample_resolve(surface);
6270             break;
6271
6272         case SFLAG_INTEXTURE:
6273         case SFLAG_INSRGBTEX:
6274             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6275                 return hr;
6276             break;
6277
6278         default:
6279             ERR("Don't know how to handle location %#x.\n", location);
6280             break;
6281     }
6282
6283     if (!rect)
6284     {
6285         surface->flags |= location;
6286
6287         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6288             surface_evict_sysmem(surface);
6289     }
6290
6291     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6292             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6293     {
6294         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6295     }
6296
6297     return WINED3D_OK;
6298 }
6299
6300 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6301 {
6302     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6303
6304     /* Not on a swapchain - must be offscreen */
6305     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6306
6307     /* The front buffer is always onscreen */
6308     if (surface == swapchain->front_buffer) return FALSE;
6309
6310     /* If the swapchain is rendered to an FBO, the backbuffer is
6311      * offscreen, otherwise onscreen */
6312     return swapchain->render_to_fbo;
6313 }
6314
6315 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6316 /* Context activation is done by the caller. */
6317 static void ffp_blit_free(struct wined3d_device *device) { }
6318
6319 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6320 /* Context activation is done by the caller. */
6321 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6322 {
6323     BYTE table[256][4];
6324     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6325
6326     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6327
6328     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6329     ENTER_GL();
6330     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6331     LEAVE_GL();
6332 }
6333
6334 /* Context activation is done by the caller. */
6335 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6336 {
6337     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6338
6339     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6340      * else the surface is converted in software at upload time in LoadLocation.
6341      */
6342     if (!(surface->flags & SFLAG_CONVERTED) && fixup == COMPLEX_FIXUP_P8
6343             && context->gl_info->supported[EXT_PALETTED_TEXTURE])
6344         ffp_blit_p8_upload_palette(surface, context->gl_info);
6345
6346     ENTER_GL();
6347     glEnable(surface->texture_target);
6348     checkGLcall("glEnable(surface->texture_target)");
6349     LEAVE_GL();
6350     return WINED3D_OK;
6351 }
6352
6353 /* Context activation is done by the caller. */
6354 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6355 {
6356     ENTER_GL();
6357     glDisable(GL_TEXTURE_2D);
6358     checkGLcall("glDisable(GL_TEXTURE_2D)");
6359     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6360     {
6361         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6362         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6363     }
6364     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6365     {
6366         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6367         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6368     }
6369     LEAVE_GL();
6370 }
6371
6372 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6373         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
6374         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
6375 {
6376     enum complex_fixup src_fixup;
6377
6378     switch (blit_op)
6379     {
6380         case WINED3D_BLIT_OP_COLOR_BLIT:
6381             if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM)
6382                 return FALSE;
6383
6384             src_fixup = get_complex_fixup(src_format->color_fixup);
6385             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6386             {
6387                 TRACE("Checking support for fixup:\n");
6388                 dump_color_fixup_desc(src_format->color_fixup);
6389             }
6390
6391             if (!is_identity_fixup(dst_format->color_fixup))
6392             {
6393                 TRACE("Destination fixups are not supported\n");
6394                 return FALSE;
6395             }
6396
6397             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6398             {
6399                 TRACE("P8 fixup supported\n");
6400                 return TRUE;
6401             }
6402
6403             /* We only support identity conversions. */
6404             if (is_identity_fixup(src_format->color_fixup))
6405             {
6406                 TRACE("[OK]\n");
6407                 return TRUE;
6408             }
6409
6410             TRACE("[FAILED]\n");
6411             return FALSE;
6412
6413         case WINED3D_BLIT_OP_COLOR_FILL:
6414             if (dst_pool == WINED3D_POOL_SYSTEM_MEM)
6415                 return FALSE;
6416
6417             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6418             {
6419                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6420                     return FALSE;
6421             }
6422             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6423             {
6424                 TRACE("Color fill not supported\n");
6425                 return FALSE;
6426             }
6427
6428             /* FIXME: We should reject color fills on formats with fixups,
6429              * but this would break P8 color fills for example. */
6430
6431             return TRUE;
6432
6433         case WINED3D_BLIT_OP_DEPTH_FILL:
6434             return TRUE;
6435
6436         default:
6437             TRACE("Unsupported blit_op=%d\n", blit_op);
6438             return FALSE;
6439     }
6440 }
6441
6442 /* Do not call while under the GL lock. */
6443 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6444         const RECT *dst_rect, const struct wined3d_color *color)
6445 {
6446     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6447     struct wined3d_fb_state fb = {&dst_surface, NULL};
6448
6449     return device_clear_render_targets(device, 1, &fb,
6450             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6451 }
6452
6453 /* Do not call while under the GL lock. */
6454 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6455         struct wined3d_surface *surface, const RECT *rect, float depth)
6456 {
6457     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6458     struct wined3d_fb_state fb = {NULL, surface};
6459
6460     return device_clear_render_targets(device, 0, &fb,
6461             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6462 }
6463
6464 const struct blit_shader ffp_blit =  {
6465     ffp_blit_alloc,
6466     ffp_blit_free,
6467     ffp_blit_set,
6468     ffp_blit_unset,
6469     ffp_blit_supported,
6470     ffp_blit_color_fill,
6471     ffp_blit_depth_fill,
6472 };
6473
6474 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6475 {
6476     return WINED3D_OK;
6477 }
6478
6479 /* Context activation is done by the caller. */
6480 static void cpu_blit_free(struct wined3d_device *device)
6481 {
6482 }
6483
6484 /* Context activation is done by the caller. */
6485 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6486 {
6487     return WINED3D_OK;
6488 }
6489
6490 /* Context activation is done by the caller. */
6491 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6492 {
6493 }
6494
6495 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6496         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
6497         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
6498 {
6499     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6500     {
6501         return TRUE;
6502     }
6503
6504     return FALSE;
6505 }
6506
6507 static HRESULT surface_cpu_blt_compressed(const BYTE *src_data, BYTE *dst_data,
6508         UINT src_pitch, UINT dst_pitch, UINT update_w, UINT update_h,
6509         const struct wined3d_format *format, DWORD flags, const WINEDDBLTFX *fx)
6510 {
6511     UINT row_block_count;
6512     const BYTE *src_row;
6513     BYTE *dst_row;
6514     UINT x, y;
6515
6516     src_row = src_data;
6517     dst_row = dst_data;
6518
6519     row_block_count = (update_w + format->block_width - 1) / format->block_width;
6520
6521     if (!flags)
6522     {
6523         for (y = 0; y < update_h; y += format->block_height)
6524         {
6525             memcpy(dst_row, src_row, row_block_count * format->block_byte_count);
6526             src_row += src_pitch;
6527             dst_row += dst_pitch;
6528         }
6529
6530         return WINED3D_OK;
6531     }
6532
6533     if (flags == WINEDDBLT_DDFX && fx->dwDDFX == WINEDDBLTFX_MIRRORUPDOWN)
6534     {
6535         src_row += (((update_h / format->block_height) - 1) * src_pitch);
6536
6537         switch (format->id)
6538         {
6539             case WINED3DFMT_DXT1:
6540                 for (y = 0; y < update_h; y += format->block_height)
6541                 {
6542                     struct block
6543                     {
6544                         WORD color[2];
6545                         BYTE control_row[4];
6546                     };
6547
6548                     const struct block *s = (const struct block *)src_row;
6549                     struct block *d = (struct block *)dst_row;
6550
6551                     for (x = 0; x < row_block_count; ++x)
6552                     {
6553                         d[x].color[0] = s[x].color[0];
6554                         d[x].color[1] = s[x].color[1];
6555                         d[x].control_row[0] = s[x].control_row[3];
6556                         d[x].control_row[1] = s[x].control_row[2];
6557                         d[x].control_row[2] = s[x].control_row[1];
6558                         d[x].control_row[3] = s[x].control_row[0];
6559                     }
6560                     src_row -= src_pitch;
6561                     dst_row += dst_pitch;
6562                 }
6563                 return WINED3D_OK;
6564
6565             case WINED3DFMT_DXT3:
6566                 for (y = 0; y < update_h; y += format->block_height)
6567                 {
6568                     struct block
6569                     {
6570                         WORD alpha_row[4];
6571                         WORD color[2];
6572                         BYTE control_row[4];
6573                     };
6574
6575                     const struct block *s = (const struct block *)src_row;
6576                     struct block *d = (struct block *)dst_row;
6577
6578                     for (x = 0; x < row_block_count; ++x)
6579                     {
6580                         d[x].alpha_row[0] = s[x].alpha_row[3];
6581                         d[x].alpha_row[1] = s[x].alpha_row[2];
6582                         d[x].alpha_row[2] = s[x].alpha_row[1];
6583                         d[x].alpha_row[3] = s[x].alpha_row[0];
6584                         d[x].color[0] = s[x].color[0];
6585                         d[x].color[1] = s[x].color[1];
6586                         d[x].control_row[0] = s[x].control_row[3];
6587                         d[x].control_row[1] = s[x].control_row[2];
6588                         d[x].control_row[2] = s[x].control_row[1];
6589                         d[x].control_row[3] = s[x].control_row[0];
6590                     }
6591                     src_row -= src_pitch;
6592                     dst_row += dst_pitch;
6593                 }
6594                 return WINED3D_OK;
6595
6596             default:
6597                 FIXME("Compressed flip not implemented for format %s.\n",
6598                         debug_d3dformat(format->id));
6599                 return E_NOTIMPL;
6600         }
6601     }
6602
6603     FIXME("Unsupported blit on compressed surface (format %s, flags %#x, DDFX %#x).\n",
6604             debug_d3dformat(format->id), flags, flags & WINEDDBLT_DDFX ? fx->dwDDFX : 0);
6605
6606     return E_NOTIMPL;
6607 }
6608
6609 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6610         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6611         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter)
6612 {
6613     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6614     const struct wined3d_format *src_format, *dst_format;
6615     struct wined3d_surface *orig_src = src_surface;
6616     struct wined3d_mapped_rect dst_map, src_map;
6617     const BYTE *sbase = NULL;
6618     HRESULT hr = WINED3D_OK;
6619     const BYTE *sbuf;
6620     RECT xdst,xsrc;
6621     BYTE *dbuf;
6622     int x, y;
6623
6624     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6625             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6626             flags, fx, debug_d3dtexturefiltertype(filter));
6627
6628     xsrc = *src_rect;
6629
6630     if (!src_surface)
6631     {
6632         RECT full_rect;
6633
6634         full_rect.left = 0;
6635         full_rect.top = 0;
6636         full_rect.right = dst_surface->resource.width;
6637         full_rect.bottom = dst_surface->resource.height;
6638         IntersectRect(&xdst, &full_rect, dst_rect);
6639     }
6640     else
6641     {
6642         BOOL clip_horiz, clip_vert;
6643
6644         xdst = *dst_rect;
6645         clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6646         clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6647
6648         if (clip_vert || clip_horiz)
6649         {
6650             /* Now check if this is a special case or not... */
6651             if ((flags & WINEDDBLT_DDFX)
6652                     || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6653                     || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6654             {
6655                 WARN("Out of screen rectangle in special case. Not handled right now.\n");
6656                 return WINED3D_OK;
6657             }
6658
6659             if (clip_horiz)
6660             {
6661                 if (xdst.left < 0)
6662                 {
6663                     xsrc.left -= xdst.left;
6664                     xdst.left = 0;
6665                 }
6666                 if (xdst.right > dst_surface->resource.width)
6667                 {
6668                     xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6669                     xdst.right = (int)dst_surface->resource.width;
6670                 }
6671             }
6672
6673             if (clip_vert)
6674             {
6675                 if (xdst.top < 0)
6676                 {
6677                     xsrc.top -= xdst.top;
6678                     xdst.top = 0;
6679                 }
6680                 if (xdst.bottom > dst_surface->resource.height)
6681                 {
6682                     xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6683                     xdst.bottom = (int)dst_surface->resource.height;
6684                 }
6685             }
6686
6687             /* And check if after clipping something is still to be done... */
6688             if ((xdst.right <= 0) || (xdst.bottom <= 0)
6689                     || (xdst.left >= (int)dst_surface->resource.width)
6690                     || (xdst.top >= (int)dst_surface->resource.height)
6691                     || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6692                     || (xsrc.left >= (int)src_surface->resource.width)
6693                     || (xsrc.top >= (int)src_surface->resource.height))
6694             {
6695                 TRACE("Nothing to be done after clipping.\n");
6696                 return WINED3D_OK;
6697             }
6698         }
6699     }
6700
6701     if (src_surface == dst_surface)
6702     {
6703         wined3d_surface_map(dst_surface, &dst_map, NULL, 0);
6704         src_map = dst_map;
6705         src_format = dst_surface->resource.format;
6706         dst_format = src_format;
6707     }
6708     else
6709     {
6710         dst_format = dst_surface->resource.format;
6711         if (src_surface)
6712         {
6713             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6714             {
6715                 src_surface = surface_convert_format(src_surface, dst_format->id);
6716                 if (!src_surface)
6717                 {
6718                     /* The conv function writes a FIXME */
6719                     WARN("Cannot convert source surface format to dest format.\n");
6720                     goto release;
6721                 }
6722             }
6723             wined3d_surface_map(src_surface, &src_map, NULL, WINED3DLOCK_READONLY);
6724             src_format = src_surface->resource.format;
6725         }
6726         else
6727         {
6728             src_format = dst_format;
6729         }
6730         if (dst_rect)
6731             wined3d_surface_map(dst_surface, &dst_map, &xdst, 0);
6732         else
6733             wined3d_surface_map(dst_surface, &dst_map, NULL, 0);
6734     }
6735
6736     bpp = dst_surface->resource.format->byte_count;
6737     srcheight = xsrc.bottom - xsrc.top;
6738     srcwidth = xsrc.right - xsrc.left;
6739     dstheight = xdst.bottom - xdst.top;
6740     dstwidth = xdst.right - xdst.left;
6741     width = (xdst.right - xdst.left) * bpp;
6742
6743     if (src_surface)
6744         sbase = (BYTE *)src_map.data
6745                 + ((xsrc.top / src_format->block_height) * src_map.row_pitch)
6746                 + ((xsrc.left / src_format->block_width) * src_format->block_byte_count);
6747     if (dst_rect && src_surface != dst_surface)
6748         dbuf = dst_map.data;
6749     else
6750         dbuf = (BYTE *)dst_map.data
6751                 + ((xdst.top / dst_format->block_height) * dst_map.row_pitch)
6752                 + ((xdst.left / dst_format->block_width) * dst_format->block_byte_count);
6753
6754     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_BLOCKS)
6755     {
6756         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6757
6758         if (src_surface == dst_surface)
6759         {
6760             FIXME("Only plain blits supported on compressed surfaces.\n");
6761             hr = E_NOTIMPL;
6762             goto release;
6763         }
6764
6765         if (srcheight != dstheight || srcwidth != dstwidth)
6766         {
6767             WARN("Stretching not supported on compressed surfaces.\n");
6768             hr = WINED3DERR_INVALIDCALL;
6769             goto release;
6770         }
6771
6772         if (srcwidth & (src_format->block_width - 1) || srcheight & (src_format->block_height - 1))
6773         {
6774             WARN("Rectangle not block-aligned.\n");
6775             hr = WINED3DERR_INVALIDCALL;
6776             goto release;
6777         }
6778
6779         hr = surface_cpu_blt_compressed(sbase, dbuf,
6780                 src_map.row_pitch, dst_map.row_pitch, dstwidth, dstheight,
6781                 src_format, flags, fx);
6782         goto release;
6783     }
6784
6785     /* First, all the 'source-less' blits */
6786     if (flags & WINEDDBLT_COLORFILL)
6787     {
6788         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, fx->u5.dwFillColor);
6789         flags &= ~WINEDDBLT_COLORFILL;
6790     }
6791
6792     if (flags & WINEDDBLT_DEPTHFILL)
6793     {
6794         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6795     }
6796     if (flags & WINEDDBLT_ROP)
6797     {
6798         /* Catch some degenerate cases here. */
6799         switch (fx->dwROP)
6800         {
6801             case BLACKNESS:
6802                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, 0);
6803                 break;
6804             case 0xAA0029: /* No-op */
6805                 break;
6806             case WHITENESS:
6807                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, ~0U);
6808                 break;
6809             case SRCCOPY: /* Well, we do that below? */
6810                 break;
6811             default:
6812                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6813                 goto error;
6814         }
6815         flags &= ~WINEDDBLT_ROP;
6816     }
6817     if (flags & WINEDDBLT_DDROPS)
6818     {
6819         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6820     }
6821     /* Now the 'with source' blits. */
6822     if (src_surface)
6823     {
6824         int sx, xinc, sy, yinc;
6825
6826         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6827             goto release;
6828
6829         if (filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT
6830                 && (srcwidth != dstwidth || srcheight != dstheight))
6831         {
6832             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6833             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6834         }
6835
6836         xinc = (srcwidth << 16) / dstwidth;
6837         yinc = (srcheight << 16) / dstheight;
6838
6839         if (!flags)
6840         {
6841             /* No effects, we can cheat here. */
6842             if (dstwidth == srcwidth)
6843             {
6844                 if (dstheight == srcheight)
6845                 {
6846                     /* No stretching in either direction. This needs to be as
6847                      * fast as possible. */
6848                     sbuf = sbase;
6849
6850                     /* Check for overlapping surfaces. */
6851                     if (src_surface != dst_surface || xdst.top < xsrc.top
6852                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6853                     {
6854                         /* No overlap, or dst above src, so copy from top downwards. */
6855                         for (y = 0; y < dstheight; ++y)
6856                         {
6857                             memcpy(dbuf, sbuf, width);
6858                             sbuf += src_map.row_pitch;
6859                             dbuf += dst_map.row_pitch;
6860                         }
6861                     }
6862                     else if (xdst.top > xsrc.top)
6863                     {
6864                         /* Copy from bottom upwards. */
6865                         sbuf += src_map.row_pitch * dstheight;
6866                         dbuf += dst_map.row_pitch * dstheight;
6867                         for (y = 0; y < dstheight; ++y)
6868                         {
6869                             sbuf -= src_map.row_pitch;
6870                             dbuf -= dst_map.row_pitch;
6871                             memcpy(dbuf, sbuf, width);
6872                         }
6873                     }
6874                     else
6875                     {
6876                         /* Src and dst overlapping on the same line, use memmove. */
6877                         for (y = 0; y < dstheight; ++y)
6878                         {
6879                             memmove(dbuf, sbuf, width);
6880                             sbuf += src_map.row_pitch;
6881                             dbuf += dst_map.row_pitch;
6882                         }
6883                     }
6884                 }
6885                 else
6886                 {
6887                     /* Stretching in y direction only. */
6888                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6889                     {
6890                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6891                         memcpy(dbuf, sbuf, width);
6892                         dbuf += dst_map.row_pitch;
6893                     }
6894                 }
6895             }
6896             else
6897             {
6898                 /* Stretching in X direction. */
6899                 int last_sy = -1;
6900                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6901                 {
6902                     sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6903
6904                     if ((sy >> 16) == (last_sy >> 16))
6905                     {
6906                         /* This source row is the same as last source row -
6907                          * Copy the already stretched row. */
6908                         memcpy(dbuf, dbuf - dst_map.row_pitch, width);
6909                     }
6910                     else
6911                     {
6912 #define STRETCH_ROW(type) \
6913 do { \
6914     const type *s = (const type *)sbuf; \
6915     type *d = (type *)dbuf; \
6916     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6917         d[x] = s[sx >> 16]; \
6918 } while(0)
6919
6920                         switch(bpp)
6921                         {
6922                             case 1:
6923                                 STRETCH_ROW(BYTE);
6924                                 break;
6925                             case 2:
6926                                 STRETCH_ROW(WORD);
6927                                 break;
6928                             case 4:
6929                                 STRETCH_ROW(DWORD);
6930                                 break;
6931                             case 3:
6932                             {
6933                                 const BYTE *s;
6934                                 BYTE *d = dbuf;
6935                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6936                                 {
6937                                     DWORD pixel;
6938
6939                                     s = sbuf + 3 * (sx >> 16);
6940                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6941                                     d[0] = (pixel      ) & 0xff;
6942                                     d[1] = (pixel >>  8) & 0xff;
6943                                     d[2] = (pixel >> 16) & 0xff;
6944                                     d += 3;
6945                                 }
6946                                 break;
6947                             }
6948                             default:
6949                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6950                                 hr = WINED3DERR_NOTAVAILABLE;
6951                                 goto error;
6952                         }
6953 #undef STRETCH_ROW
6954                     }
6955                     dbuf += dst_map.row_pitch;
6956                     last_sy = sy;
6957                 }
6958             }
6959         }
6960         else
6961         {
6962             LONG dstyinc = dst_map.row_pitch, dstxinc = bpp;
6963             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6964             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6965             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6966             {
6967                 /* The color keying flags are checked for correctness in ddraw */
6968                 if (flags & WINEDDBLT_KEYSRC)
6969                 {
6970                     keylow  = src_surface->src_blt_color_key.color_space_low_value;
6971                     keyhigh = src_surface->src_blt_color_key.color_space_high_value;
6972                 }
6973                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6974                 {
6975                     keylow = fx->ddckSrcColorkey.color_space_low_value;
6976                     keyhigh = fx->ddckSrcColorkey.color_space_high_value;
6977                 }
6978
6979                 if (flags & WINEDDBLT_KEYDEST)
6980                 {
6981                     /* Destination color keys are taken from the source surface! */
6982                     destkeylow = src_surface->dst_blt_color_key.color_space_low_value;
6983                     destkeyhigh = src_surface->dst_blt_color_key.color_space_high_value;
6984                 }
6985                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6986                 {
6987                     destkeylow = fx->ddckDestColorkey.color_space_low_value;
6988                     destkeyhigh = fx->ddckDestColorkey.color_space_high_value;
6989                 }
6990
6991                 if (bpp == 1)
6992                 {
6993                     keymask = 0xff;
6994                 }
6995                 else
6996                 {
6997                     keymask = src_format->red_mask
6998                             | src_format->green_mask
6999                             | src_format->blue_mask;
7000                 }
7001                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
7002             }
7003
7004             if (flags & WINEDDBLT_DDFX)
7005             {
7006                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
7007                 LONG tmpxy;
7008                 dTopLeft     = dbuf;
7009                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
7010                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dst_map.row_pitch);
7011                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
7012
7013                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
7014                 {
7015                     /* I don't think we need to do anything about this flag */
7016                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
7017                 }
7018                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
7019                 {
7020                     tmp          = dTopRight;
7021                     dTopRight    = dTopLeft;
7022                     dTopLeft     = tmp;
7023                     tmp          = dBottomRight;
7024                     dBottomRight = dBottomLeft;
7025                     dBottomLeft  = tmp;
7026                     dstxinc = dstxinc * -1;
7027                 }
7028                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
7029                 {
7030                     tmp          = dTopLeft;
7031                     dTopLeft     = dBottomLeft;
7032                     dBottomLeft  = tmp;
7033                     tmp          = dTopRight;
7034                     dTopRight    = dBottomRight;
7035                     dBottomRight = tmp;
7036                     dstyinc = dstyinc * -1;
7037                 }
7038                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
7039                 {
7040                     /* I don't think we need to do anything about this flag */
7041                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
7042                 }
7043                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
7044                 {
7045                     tmp          = dBottomRight;
7046                     dBottomRight = dTopLeft;
7047                     dTopLeft     = tmp;
7048                     tmp          = dBottomLeft;
7049                     dBottomLeft  = dTopRight;
7050                     dTopRight    = tmp;
7051                     dstxinc = dstxinc * -1;
7052                     dstyinc = dstyinc * -1;
7053                 }
7054                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
7055                 {
7056                     tmp          = dTopLeft;
7057                     dTopLeft     = dBottomLeft;
7058                     dBottomLeft  = dBottomRight;
7059                     dBottomRight = dTopRight;
7060                     dTopRight    = tmp;
7061                     tmpxy   = dstxinc;
7062                     dstxinc = dstyinc;
7063                     dstyinc = tmpxy;
7064                     dstxinc = dstxinc * -1;
7065                 }
7066                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
7067                 {
7068                     tmp          = dTopLeft;
7069                     dTopLeft     = dTopRight;
7070                     dTopRight    = dBottomRight;
7071                     dBottomRight = dBottomLeft;
7072                     dBottomLeft  = tmp;
7073                     tmpxy   = dstxinc;
7074                     dstxinc = dstyinc;
7075                     dstyinc = tmpxy;
7076                     dstyinc = dstyinc * -1;
7077                 }
7078                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
7079                 {
7080                     /* I don't think we need to do anything about this flag */
7081                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
7082                 }
7083                 dbuf = dTopLeft;
7084                 flags &= ~(WINEDDBLT_DDFX);
7085             }
7086
7087 #define COPY_COLORKEY_FX(type) \
7088 do { \
7089     const type *s; \
7090     type *d = (type *)dbuf, *dx, tmp; \
7091     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
7092     { \
7093         s = (const type *)(sbase + (sy >> 16) * src_map.row_pitch); \
7094         dx = d; \
7095         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
7096         { \
7097             tmp = s[sx >> 16]; \
7098             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
7099                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
7100             { \
7101                 dx[0] = tmp; \
7102             } \
7103             dx = (type *)(((BYTE *)dx) + dstxinc); \
7104         } \
7105         d = (type *)(((BYTE *)d) + dstyinc); \
7106     } \
7107 } while(0)
7108
7109             switch (bpp)
7110             {
7111                 case 1:
7112                     COPY_COLORKEY_FX(BYTE);
7113                     break;
7114                 case 2:
7115                     COPY_COLORKEY_FX(WORD);
7116                     break;
7117                 case 4:
7118                     COPY_COLORKEY_FX(DWORD);
7119                     break;
7120                 case 3:
7121                 {
7122                     const BYTE *s;
7123                     BYTE *d = dbuf, *dx;
7124                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
7125                     {
7126                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
7127                         dx = d;
7128                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7129                         {
7130                             DWORD pixel, dpixel = 0;
7131                             s = sbuf + 3 * (sx>>16);
7132                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7133                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7134                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7135                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7136                             {
7137                                 dx[0] = (pixel      ) & 0xff;
7138                                 dx[1] = (pixel >>  8) & 0xff;
7139                                 dx[2] = (pixel >> 16) & 0xff;
7140                             }
7141                             dx += dstxinc;
7142                         }
7143                         d += dstyinc;
7144                     }
7145                     break;
7146                 }
7147                 default:
7148                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7149                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7150                     hr = WINED3DERR_NOTAVAILABLE;
7151                     goto error;
7152 #undef COPY_COLORKEY_FX
7153             }
7154         }
7155     }
7156
7157 error:
7158     if (flags && FIXME_ON(d3d_surface))
7159     {
7160         FIXME("\tUnsupported flags: %#x.\n", flags);
7161     }
7162
7163 release:
7164     wined3d_surface_unmap(dst_surface);
7165     if (src_surface && src_surface != dst_surface)
7166         wined3d_surface_unmap(src_surface);
7167     /* Release the converted surface, if any. */
7168     if (src_surface && src_surface != orig_src)
7169         wined3d_surface_decref(src_surface);
7170
7171     return hr;
7172 }
7173
7174 /* Do not call while under the GL lock. */
7175 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7176         const RECT *dst_rect, const struct wined3d_color *color)
7177 {
7178     static const RECT src_rect;
7179     WINEDDBLTFX BltFx;
7180
7181     memset(&BltFx, 0, sizeof(BltFx));
7182     BltFx.dwSize = sizeof(BltFx);
7183     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7184     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7185             WINEDDBLT_COLORFILL, &BltFx, WINED3D_TEXF_POINT);
7186 }
7187
7188 /* Do not call while under the GL lock. */
7189 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7190         struct wined3d_surface *surface, const RECT *rect, float depth)
7191 {
7192     FIXME("Depth filling not implemented by cpu_blit.\n");
7193     return WINED3DERR_INVALIDCALL;
7194 }
7195
7196 const struct blit_shader cpu_blit =  {
7197     cpu_blit_alloc,
7198     cpu_blit_free,
7199     cpu_blit_set,
7200     cpu_blit_unset,
7201     cpu_blit_supported,
7202     cpu_blit_color_fill,
7203     cpu_blit_depth_fill,
7204 };
7205
7206 static HRESULT surface_init(struct wined3d_surface *surface, enum wined3d_surface_type surface_type, UINT alignment,
7207         UINT width, UINT height, UINT level, enum wined3d_multisample_type multisample_type,
7208         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7209         enum wined3d_pool pool, DWORD flags, void *parent, const struct wined3d_parent_ops *parent_ops)
7210 {
7211     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7212     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7213     BOOL lockable = flags & WINED3D_SURFACE_MAPPABLE;
7214     unsigned int resource_size;
7215     HRESULT hr;
7216
7217     if (multisample_quality > 0)
7218     {
7219         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7220         multisample_quality = 0;
7221     }
7222
7223     /* Quick lockable sanity check.
7224      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7225      * this function is too deep to need to care about things like this.
7226      * Levels need to be checked too, since they all affect what can be done. */
7227     switch (pool)
7228     {
7229         case WINED3D_POOL_SCRATCH:
7230             if (!lockable)
7231             {
7232                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7233                         "which are mutually exclusive, setting lockable to TRUE.\n");
7234                 lockable = TRUE;
7235             }
7236             break;
7237
7238         case WINED3D_POOL_SYSTEM_MEM:
7239             if (!lockable)
7240                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7241             break;
7242
7243         case WINED3D_POOL_MANAGED:
7244             if (usage & WINED3DUSAGE_DYNAMIC)
7245                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7246             break;
7247
7248         case WINED3D_POOL_DEFAULT:
7249             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7250                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7251             break;
7252
7253         default:
7254             FIXME("Unknown pool %#x.\n", pool);
7255             break;
7256     };
7257
7258     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3D_POOL_DEFAULT)
7259         FIXME("Trying to create a render target that isn't in the default pool.\n");
7260
7261     /* FIXME: Check that the format is supported by the device. */
7262
7263     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7264     if (!resource_size)
7265         return WINED3DERR_INVALIDCALL;
7266
7267     surface->surface_type = surface_type;
7268
7269     switch (surface_type)
7270     {
7271         case WINED3D_SURFACE_TYPE_OPENGL:
7272             surface->surface_ops = &surface_ops;
7273             break;
7274
7275         case WINED3D_SURFACE_TYPE_GDI:
7276             surface->surface_ops = &gdi_surface_ops;
7277             break;
7278
7279         default:
7280             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7281             return WINED3DERR_INVALIDCALL;
7282     }
7283
7284     hr = resource_init(&surface->resource, device, WINED3D_RTYPE_SURFACE, format,
7285             multisample_type, multisample_quality, usage, pool, width, height, 1,
7286             resource_size, parent, parent_ops, &surface_resource_ops);
7287     if (FAILED(hr))
7288     {
7289         WARN("Failed to initialize resource, returning %#x.\n", hr);
7290         return hr;
7291     }
7292
7293     /* "Standalone" surface. */
7294     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7295
7296     surface->texture_level = level;
7297     list_init(&surface->overlays);
7298
7299     /* Flags */
7300     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7301     if (flags & WINED3D_SURFACE_DISCARD)
7302         surface->flags |= SFLAG_DISCARD;
7303     if (flags & WINED3D_SURFACE_PIN_SYSMEM)
7304         surface->flags |= SFLAG_PIN_SYSMEM;
7305     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7306         surface->flags |= SFLAG_LOCKABLE;
7307     /* I'm not sure if this qualifies as a hack or as an optimization. It
7308      * seems reasonable to assume that lockable render targets will get
7309      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7310      * creation. However, the other reason we want to do this is that several
7311      * ddraw applications access surface memory while the surface isn't
7312      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7313      * future locks prevents these from crashing. */
7314     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7315         surface->flags |= SFLAG_DYNLOCK;
7316
7317     /* Mark the texture as dirty so that it gets loaded first time around. */
7318     surface_add_dirty_rect(surface, NULL);
7319     list_init(&surface->renderbuffers);
7320
7321     TRACE("surface %p, memory %p, size %u\n",
7322             surface, surface->resource.allocatedMemory, surface->resource.size);
7323
7324     /* Call the private setup routine */
7325     hr = surface->surface_ops->surface_private_setup(surface);
7326     if (FAILED(hr))
7327     {
7328         ERR("Private setup failed, returning %#x\n", hr);
7329         surface_cleanup(surface);
7330         return hr;
7331     }
7332
7333     /* Similar to lockable rendertargets above, creating the DIB section
7334      * during surface initialization prevents the sysmem pointer from changing
7335      * after a wined3d_surface_getdc() call. */
7336     if ((usage & WINED3DUSAGE_OWNDC) && !surface->hDC
7337             && SUCCEEDED(surface_create_dib_section(surface)))
7338     {
7339         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
7340         surface->resource.heapMemory = NULL;
7341         surface->resource.allocatedMemory = surface->dib.bitmap_data;
7342     }
7343
7344     return hr;
7345 }
7346
7347 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7348         enum wined3d_format_id format_id, UINT level, DWORD usage, enum wined3d_pool pool,
7349         enum wined3d_multisample_type multisample_type, DWORD multisample_quality,
7350         enum wined3d_surface_type surface_type, DWORD flags, void *parent,
7351         const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7352 {
7353     struct wined3d_surface *object;
7354     HRESULT hr;
7355
7356     TRACE("device %p, width %u, height %u, format %s, level %u\n",
7357             device, width, height, debug_d3dformat(format_id), level);
7358     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7359             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7360     TRACE("surface_type %#x, flags %#x, parent %p, parent_ops %p.\n", surface_type, flags, parent, parent_ops);
7361
7362     if (surface_type == WINED3D_SURFACE_TYPE_OPENGL && !device->adapter)
7363     {
7364         ERR("OpenGL surfaces are not available without OpenGL.\n");
7365         return WINED3DERR_NOTAVAILABLE;
7366     }
7367
7368     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7369     if (!object)
7370     {
7371         ERR("Failed to allocate surface memory.\n");
7372         return WINED3DERR_OUTOFVIDEOMEMORY;
7373     }
7374
7375     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level,
7376             multisample_type, multisample_quality, device, usage, format_id, pool, flags, parent, parent_ops);
7377     if (FAILED(hr))
7378     {
7379         WARN("Failed to initialize surface, returning %#x.\n", hr);
7380         HeapFree(GetProcessHeap(), 0, object);
7381         return hr;
7382     }
7383
7384     TRACE("Created surface %p.\n", object);
7385     *surface = object;
7386
7387     return hr;
7388 }