ddraw: Explicitly check for the IDirectDrawColorControl interface in ddraw_surface7_Q...
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2011 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         enum wined3d_texture_filter_type filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     struct wined3d_surface *overlay, *cur;
46
47     TRACE("surface %p.\n", surface);
48
49     if (surface->texture_name || (surface->flags & SFLAG_PBO)
50              || surface->rb_multisample || surface->rb_resolved
51              || !list_empty(&surface->renderbuffers))
52     {
53         struct wined3d_renderbuffer_entry *entry, *entry2;
54         const struct wined3d_gl_info *gl_info;
55         struct wined3d_context *context;
56
57         context = context_acquire(surface->resource.device, NULL);
58         gl_info = context->gl_info;
59
60         ENTER_GL();
61
62         if (surface->texture_name)
63         {
64             TRACE("Deleting texture %u.\n", surface->texture_name);
65             glDeleteTextures(1, &surface->texture_name);
66         }
67
68         if (surface->flags & SFLAG_PBO)
69         {
70             TRACE("Deleting PBO %u.\n", surface->pbo);
71             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
72         }
73
74         if (surface->rb_multisample)
75         {
76             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
77             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
78         }
79
80         if (surface->rb_resolved)
81         {
82             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
83             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
84         }
85
86         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
87         {
88             TRACE("Deleting renderbuffer %u.\n", entry->id);
89             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
90             HeapFree(GetProcessHeap(), 0, entry);
91         }
92
93         LEAVE_GL();
94
95         context_release(context);
96     }
97
98     if (surface->flags & SFLAG_DIBSECTION)
99     {
100         DeleteDC(surface->hDC);
101         DeleteObject(surface->dib.DIBsection);
102         surface->dib.bitmap_data = NULL;
103         surface->resource.allocatedMemory = NULL;
104     }
105
106     if (surface->flags & SFLAG_USERPTR)
107         wined3d_surface_set_mem(surface, NULL);
108     if (surface->overlay_dest)
109         list_remove(&surface->overlay_entry);
110
111     LIST_FOR_EACH_ENTRY_SAFE(overlay, cur, &surface->overlays, struct wined3d_surface, overlay_entry)
112     {
113         list_remove(&overlay->overlay_entry);
114         overlay->overlay_dest = NULL;
115     }
116
117     resource_cleanup(&surface->resource);
118 }
119
120 void surface_update_draw_binding(struct wined3d_surface *surface)
121 {
122     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
123         surface->draw_binding = SFLAG_INDRAWABLE;
124     else if (surface->resource.multisample_type)
125         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
126     else
127         surface->draw_binding = SFLAG_INTEXTURE;
128 }
129
130 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
131 {
132     TRACE("surface %p, container %p.\n", surface, container);
133
134     if (!container && type != WINED3D_CONTAINER_NONE)
135         ERR("Setting NULL container of type %#x.\n", type);
136
137     if (type == WINED3D_CONTAINER_SWAPCHAIN)
138     {
139         surface->get_drawable_size = get_drawable_size_swapchain;
140     }
141     else
142     {
143         switch (wined3d_settings.offscreen_rendering_mode)
144         {
145             case ORM_FBO:
146                 surface->get_drawable_size = get_drawable_size_fbo;
147                 break;
148
149             case ORM_BACKBUFFER:
150                 surface->get_drawable_size = get_drawable_size_backbuffer;
151                 break;
152
153             default:
154                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
155                 return;
156         }
157     }
158
159     surface->container.type = type;
160     surface->container.u.base = container;
161     surface_update_draw_binding(surface);
162 }
163
164 struct blt_info
165 {
166     GLenum binding;
167     GLenum bind_target;
168     enum tex_types tex_type;
169     GLfloat coords[4][3];
170 };
171
172 struct float_rect
173 {
174     float l;
175     float t;
176     float r;
177     float b;
178 };
179
180 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
181 {
182     f->l = ((r->left * 2.0f) / w) - 1.0f;
183     f->t = ((r->top * 2.0f) / h) - 1.0f;
184     f->r = ((r->right * 2.0f) / w) - 1.0f;
185     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
186 }
187
188 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
189 {
190     GLfloat (*coords)[3] = info->coords;
191     struct float_rect f;
192
193     switch (target)
194     {
195         default:
196             FIXME("Unsupported texture target %#x\n", target);
197             /* Fall back to GL_TEXTURE_2D */
198         case GL_TEXTURE_2D:
199             info->binding = GL_TEXTURE_BINDING_2D;
200             info->bind_target = GL_TEXTURE_2D;
201             info->tex_type = tex_2d;
202             coords[0][0] = (float)rect->left / w;
203             coords[0][1] = (float)rect->top / h;
204             coords[0][2] = 0.0f;
205
206             coords[1][0] = (float)rect->right / w;
207             coords[1][1] = (float)rect->top / h;
208             coords[1][2] = 0.0f;
209
210             coords[2][0] = (float)rect->left / w;
211             coords[2][1] = (float)rect->bottom / h;
212             coords[2][2] = 0.0f;
213
214             coords[3][0] = (float)rect->right / w;
215             coords[3][1] = (float)rect->bottom / h;
216             coords[3][2] = 0.0f;
217             break;
218
219         case GL_TEXTURE_RECTANGLE_ARB:
220             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
221             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
222             info->tex_type = tex_rect;
223             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
224             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
225             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
226             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
227             break;
228
229         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
230             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
231             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
232             info->tex_type = tex_cube;
233             cube_coords_float(rect, w, h, &f);
234
235             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
236             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
237             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
238             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
239             break;
240
241         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
242             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
243             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
244             info->tex_type = tex_cube;
245             cube_coords_float(rect, w, h, &f);
246
247             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
248             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
249             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
250             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
251             break;
252
253         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
254             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
255             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
256             info->tex_type = tex_cube;
257             cube_coords_float(rect, w, h, &f);
258
259             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
260             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
261             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
262             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
263             break;
264
265         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
266             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
267             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
268             info->tex_type = tex_cube;
269             cube_coords_float(rect, w, h, &f);
270
271             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
272             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
273             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
274             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
275             break;
276
277         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
278             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
279             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
280             info->tex_type = tex_cube;
281             cube_coords_float(rect, w, h, &f);
282
283             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
284             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
285             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
286             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
287             break;
288
289         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
290             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
291             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
292             info->tex_type = tex_cube;
293             cube_coords_float(rect, w, h, &f);
294
295             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
296             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
297             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
298             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
299             break;
300     }
301 }
302
303 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
304 {
305     if (rect_in)
306         *rect_out = *rect_in;
307     else
308     {
309         rect_out->left = 0;
310         rect_out->top = 0;
311         rect_out->right = surface->resource.width;
312         rect_out->bottom = surface->resource.height;
313     }
314 }
315
316 /* GL locking and context activation is done by the caller */
317 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
318         const RECT *src_rect, const RECT *dst_rect, enum wined3d_texture_filter_type filter)
319 {
320     struct blt_info info;
321
322     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
323
324     glEnable(info.bind_target);
325     checkGLcall("glEnable(bind_target)");
326
327     context_bind_texture(context, info.bind_target, src_surface->texture_name);
328
329     /* Filtering for StretchRect */
330     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
331             wined3d_gl_mag_filter(magLookup, filter));
332     checkGLcall("glTexParameteri");
333     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
334             wined3d_gl_min_mip_filter(minMipLookup, filter, WINED3D_TEXF_NONE));
335     checkGLcall("glTexParameteri");
336     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
337     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
338     if (context->gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
339         glTexParameteri(info.bind_target, GL_TEXTURE_SRGB_DECODE_EXT, GL_SKIP_DECODE_EXT);
340     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
341     checkGLcall("glTexEnvi");
342
343     /* Draw a quad */
344     glBegin(GL_TRIANGLE_STRIP);
345     glTexCoord3fv(info.coords[0]);
346     glVertex2i(dst_rect->left, dst_rect->top);
347
348     glTexCoord3fv(info.coords[1]);
349     glVertex2i(dst_rect->right, dst_rect->top);
350
351     glTexCoord3fv(info.coords[2]);
352     glVertex2i(dst_rect->left, dst_rect->bottom);
353
354     glTexCoord3fv(info.coords[3]);
355     glVertex2i(dst_rect->right, dst_rect->bottom);
356     glEnd();
357
358     /* Unbind the texture */
359     context_bind_texture(context, info.bind_target, 0);
360
361     /* We changed the filtering settings on the texture. Inform the
362      * container about this to get the filters reset properly next draw. */
363     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
364     {
365         struct wined3d_texture *texture = src_surface->container.u.texture;
366         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3D_TEXF_POINT;
367         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3D_TEXF_POINT;
368         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3D_TEXF_NONE;
369         texture->texture_rgb.states[WINED3DTEXSTA_SRGBTEXTURE] = FALSE;
370     }
371 }
372
373 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
374 {
375     const struct wined3d_format *format = surface->resource.format;
376     SYSTEM_INFO sysInfo;
377     BITMAPINFO *b_info;
378     int extraline = 0;
379     DWORD *masks;
380     UINT usage;
381     HDC dc;
382
383     TRACE("surface %p.\n", surface);
384
385     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
386     {
387         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
388         return WINED3DERR_INVALIDCALL;
389     }
390
391     switch (format->byte_count)
392     {
393         case 2:
394         case 4:
395             /* Allocate extra space to store the RGB bit masks. */
396             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
397             break;
398
399         case 3:
400             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
401             break;
402
403         default:
404             /* Allocate extra space for a palette. */
405             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
406                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
407             break;
408     }
409
410     if (!b_info)
411         return E_OUTOFMEMORY;
412
413     /* Some applications access the surface in via DWORDs, and do not take
414      * the necessary care at the end of the surface. So we need at least
415      * 4 extra bytes at the end of the surface. Check against the page size,
416      * if the last page used for the surface has at least 4 spare bytes we're
417      * safe, otherwise add an extra line to the DIB section. */
418     GetSystemInfo(&sysInfo);
419     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
420     {
421         extraline = 1;
422         TRACE("Adding an extra line to the DIB section.\n");
423     }
424
425     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
426     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
427     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
428     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
429     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
430             * wined3d_surface_get_pitch(surface);
431     b_info->bmiHeader.biPlanes = 1;
432     b_info->bmiHeader.biBitCount = format->byte_count * 8;
433
434     b_info->bmiHeader.biXPelsPerMeter = 0;
435     b_info->bmiHeader.biYPelsPerMeter = 0;
436     b_info->bmiHeader.biClrUsed = 0;
437     b_info->bmiHeader.biClrImportant = 0;
438
439     /* Get the bit masks */
440     masks = (DWORD *)b_info->bmiColors;
441     switch (surface->resource.format->id)
442     {
443         case WINED3DFMT_B8G8R8_UNORM:
444             usage = DIB_RGB_COLORS;
445             b_info->bmiHeader.biCompression = BI_RGB;
446             break;
447
448         case WINED3DFMT_B5G5R5X1_UNORM:
449         case WINED3DFMT_B5G5R5A1_UNORM:
450         case WINED3DFMT_B4G4R4A4_UNORM:
451         case WINED3DFMT_B4G4R4X4_UNORM:
452         case WINED3DFMT_B2G3R3_UNORM:
453         case WINED3DFMT_B2G3R3A8_UNORM:
454         case WINED3DFMT_R10G10B10A2_UNORM:
455         case WINED3DFMT_R8G8B8A8_UNORM:
456         case WINED3DFMT_R8G8B8X8_UNORM:
457         case WINED3DFMT_B10G10R10A2_UNORM:
458         case WINED3DFMT_B5G6R5_UNORM:
459         case WINED3DFMT_R16G16B16A16_UNORM:
460             usage = 0;
461             b_info->bmiHeader.biCompression = BI_BITFIELDS;
462             masks[0] = format->red_mask;
463             masks[1] = format->green_mask;
464             masks[2] = format->blue_mask;
465             break;
466
467         default:
468             /* Don't know palette */
469             b_info->bmiHeader.biCompression = BI_RGB;
470             usage = 0;
471             break;
472     }
473
474     if (!(dc = GetDC(0)))
475     {
476         HeapFree(GetProcessHeap(), 0, b_info);
477         return HRESULT_FROM_WIN32(GetLastError());
478     }
479
480     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
481             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
482             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
483     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
484     ReleaseDC(0, dc);
485
486     if (!surface->dib.DIBsection)
487     {
488         ERR("Failed to create DIB section.\n");
489         HeapFree(GetProcessHeap(), 0, b_info);
490         return HRESULT_FROM_WIN32(GetLastError());
491     }
492
493     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
494     /* Copy the existing surface to the dib section. */
495     if (surface->resource.allocatedMemory)
496     {
497         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
498                 surface->resource.height * wined3d_surface_get_pitch(surface));
499     }
500     else
501     {
502         /* This is to make maps read the GL texture although memory is allocated. */
503         surface->flags &= ~SFLAG_INSYSMEM;
504     }
505     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
506
507     HeapFree(GetProcessHeap(), 0, b_info);
508
509     /* Now allocate a DC. */
510     surface->hDC = CreateCompatibleDC(0);
511     SelectObject(surface->hDC, surface->dib.DIBsection);
512     TRACE("Using wined3d palette %p.\n", surface->palette);
513     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
514
515     surface->flags |= SFLAG_DIBSECTION;
516
517     return WINED3D_OK;
518 }
519
520 static BOOL surface_need_pbo(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
521 {
522     if (surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
523         return FALSE;
524     if (!(surface->flags & SFLAG_DYNLOCK))
525         return FALSE;
526     if (surface->flags & (SFLAG_CONVERTED | SFLAG_NONPOW2 | SFLAG_PIN_SYSMEM))
527         return FALSE;
528     if (!gl_info->supported[ARB_PIXEL_BUFFER_OBJECT])
529         return FALSE;
530
531     return TRUE;
532 }
533
534 static void surface_load_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
535 {
536     struct wined3d_context *context;
537     GLenum error;
538
539     context = context_acquire(surface->resource.device, NULL);
540     ENTER_GL();
541
542     GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
543     error = glGetError();
544     if (!surface->pbo || error != GL_NO_ERROR)
545         ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
546
547     TRACE("Binding PBO %u.\n", surface->pbo);
548
549     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
550     checkGLcall("glBindBufferARB");
551
552     GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
553             surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
554     checkGLcall("glBufferDataARB");
555
556     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
557     checkGLcall("glBindBufferARB");
558
559     /* We don't need the system memory anymore and we can't even use it for PBOs. */
560     if (!(surface->flags & SFLAG_CLIENT))
561     {
562         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
563         surface->resource.heapMemory = NULL;
564     }
565     surface->resource.allocatedMemory = NULL;
566     surface->flags |= SFLAG_PBO;
567     LEAVE_GL();
568     context_release(context);
569 }
570
571 static void surface_prepare_system_memory(struct wined3d_surface *surface)
572 {
573     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
574
575     TRACE("surface %p.\n", surface);
576
577     if (!(surface->flags & SFLAG_PBO) && surface_need_pbo(surface, gl_info))
578         surface_load_pbo(surface, gl_info);
579     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
580     {
581         /* Whatever surface we have, make sure that there is memory allocated
582          * for the downloaded copy, or a PBO to map. */
583         if (!surface->resource.heapMemory)
584             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
585
586         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
587                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
588
589         if (surface->flags & SFLAG_INSYSMEM)
590             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
591     }
592 }
593
594 static void surface_evict_sysmem(struct wined3d_surface *surface)
595 {
596     if (surface->flags & SFLAG_DONOTFREE)
597         return;
598
599     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
600     surface->resource.allocatedMemory = NULL;
601     surface->resource.heapMemory = NULL;
602     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
603 }
604
605 /* Context activation is done by the caller. */
606 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
607         struct wined3d_context *context, BOOL srgb)
608 {
609     struct wined3d_device *device = surface->resource.device;
610     DWORD active_sampler;
611
612     /* We don't need a specific texture unit, but after binding the texture
613      * the current unit is dirty. Read the unit back instead of switching to
614      * 0, this avoids messing around with the state manager's GL states. The
615      * current texture unit should always be a valid one.
616      *
617      * To be more specific, this is tricky because we can implicitly be
618      * called from sampler() in state.c. This means we can't touch anything
619      * other than whatever happens to be the currently active texture, or we
620      * would risk marking already applied sampler states dirty again. */
621     active_sampler = device->rev_tex_unit_map[context->active_texture];
622
623     if (active_sampler != WINED3D_UNMAPPED_STAGE)
624         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
625     surface_bind(surface, context, srgb);
626 }
627
628 static void surface_force_reload(struct wined3d_surface *surface)
629 {
630     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
631 }
632
633 static void surface_release_client_storage(struct wined3d_surface *surface)
634 {
635     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
636
637     ENTER_GL();
638     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
639     if (surface->texture_name)
640     {
641         surface_bind_and_dirtify(surface, context, FALSE);
642         glTexImage2D(surface->texture_target, surface->texture_level,
643                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
644     }
645     if (surface->texture_name_srgb)
646     {
647         surface_bind_and_dirtify(surface, context, TRUE);
648         glTexImage2D(surface->texture_target, surface->texture_level,
649                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
650     }
651     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
652     LEAVE_GL();
653
654     context_release(context);
655
656     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
657     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
658     surface_force_reload(surface);
659 }
660
661 static HRESULT surface_private_setup(struct wined3d_surface *surface)
662 {
663     /* TODO: Check against the maximum texture sizes supported by the video card. */
664     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
665     unsigned int pow2Width, pow2Height;
666
667     TRACE("surface %p.\n", surface);
668
669     surface->texture_name = 0;
670     surface->texture_target = GL_TEXTURE_2D;
671
672     /* Non-power2 support */
673     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
674     {
675         pow2Width = surface->resource.width;
676         pow2Height = surface->resource.height;
677     }
678     else
679     {
680         /* Find the nearest pow2 match */
681         pow2Width = pow2Height = 1;
682         while (pow2Width < surface->resource.width)
683             pow2Width <<= 1;
684         while (pow2Height < surface->resource.height)
685             pow2Height <<= 1;
686     }
687     surface->pow2Width = pow2Width;
688     surface->pow2Height = pow2Height;
689
690     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
691     {
692         /* TODO: Add support for non power two compressed textures. */
693         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
694         {
695             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
696                   surface, surface->resource.width, surface->resource.height);
697             return WINED3DERR_NOTAVAILABLE;
698         }
699     }
700
701     if (pow2Width != surface->resource.width
702             || pow2Height != surface->resource.height)
703     {
704         surface->flags |= SFLAG_NONPOW2;
705     }
706
707     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
708             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
709     {
710         /* One of three options:
711          * 1: Do the same as we do with NPOT and scale the texture, (any
712          *    texture ops would require the texture to be scaled which is
713          *    potentially slow)
714          * 2: Set the texture to the maximum size (bad idea).
715          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
716          * 4: Create the surface, but allow it to be used only for DirectDraw
717          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
718          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
719          *    the render target. */
720         if (surface->resource.pool == WINED3D_POOL_DEFAULT || surface->resource.pool == WINED3D_POOL_MANAGED)
721         {
722             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
723             return WINED3DERR_NOTAVAILABLE;
724         }
725
726         /* We should never use this surface in combination with OpenGL! */
727         TRACE("Creating an oversized surface: %ux%u.\n",
728                 surface->pow2Width, surface->pow2Height);
729     }
730     else
731     {
732         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
733          * and EXT_PALETTED_TEXTURE is used in combination with texture
734          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
735          * EXT_PALETTED_TEXTURE doesn't work in combination with
736          * ARB_TEXTURE_RECTANGLE. */
737         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
738                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
739                 && gl_info->supported[EXT_PALETTED_TEXTURE]
740                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
741         {
742             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
743             surface->pow2Width = surface->resource.width;
744             surface->pow2Height = surface->resource.height;
745             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
746         }
747     }
748
749     switch (wined3d_settings.offscreen_rendering_mode)
750     {
751         case ORM_FBO:
752             surface->get_drawable_size = get_drawable_size_fbo;
753             break;
754
755         case ORM_BACKBUFFER:
756             surface->get_drawable_size = get_drawable_size_backbuffer;
757             break;
758
759         default:
760             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
761             return WINED3DERR_INVALIDCALL;
762     }
763
764     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
765         surface->flags |= SFLAG_DISCARDED;
766
767     return WINED3D_OK;
768 }
769
770 static void surface_realize_palette(struct wined3d_surface *surface)
771 {
772     struct wined3d_palette *palette = surface->palette;
773
774     TRACE("surface %p.\n", surface);
775
776     if (!palette) return;
777
778     if (surface->resource.format->id == WINED3DFMT_P8_UINT
779             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
780     {
781         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
782         {
783             /* Make sure the texture is up to date. This call doesn't do
784              * anything if the texture is already up to date. */
785             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
786
787             /* We want to force a palette refresh, so mark the drawable as not being up to date */
788             if (!surface_is_offscreen(surface))
789                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
790         }
791         else
792         {
793             if (!(surface->flags & SFLAG_INSYSMEM))
794             {
795                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
796                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
797             }
798             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
799         }
800     }
801
802     if (surface->flags & SFLAG_DIBSECTION)
803     {
804         RGBQUAD col[256];
805         unsigned int i;
806
807         TRACE("Updating the DC's palette.\n");
808
809         for (i = 0; i < 256; ++i)
810         {
811             col[i].rgbRed   = palette->palents[i].peRed;
812             col[i].rgbGreen = palette->palents[i].peGreen;
813             col[i].rgbBlue  = palette->palents[i].peBlue;
814             col[i].rgbReserved = 0;
815         }
816         SetDIBColorTable(surface->hDC, 0, 256, col);
817     }
818
819     /* Propagate the changes to the drawable when we have a palette. */
820     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
821         surface_load_location(surface, surface->draw_binding, NULL);
822 }
823
824 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
825 {
826     HRESULT hr;
827
828     /* If there's no destination surface there is nothing to do. */
829     if (!surface->overlay_dest)
830         return WINED3D_OK;
831
832     /* Blt calls ModifyLocation on the dest surface, which in turn calls
833      * DrawOverlay to update the overlay. Prevent an endless recursion. */
834     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
835         return WINED3D_OK;
836
837     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
838     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
839             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3D_TEXF_LINEAR);
840     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
841
842     return hr;
843 }
844
845 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
846 {
847     struct wined3d_device *device = surface->resource.device;
848     const RECT *pass_rect = rect;
849
850     TRACE("surface %p, rect %s, flags %#x.\n",
851             surface, wine_dbgstr_rect(rect), flags);
852
853     if (flags & WINED3DLOCK_DISCARD)
854     {
855         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
856         surface_prepare_system_memory(surface);
857         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
858     }
859     else
860     {
861         /* surface_load_location() does not check if the rectangle specifies
862          * the full surface. Most callers don't need that, so do it here. */
863         if (rect && !rect->top && !rect->left
864                 && rect->right == surface->resource.width
865                 && rect->bottom == surface->resource.height)
866             pass_rect = NULL;
867         surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
868     }
869
870     if (surface->flags & SFLAG_PBO)
871     {
872         const struct wined3d_gl_info *gl_info;
873         struct wined3d_context *context;
874
875         context = context_acquire(device, NULL);
876         gl_info = context->gl_info;
877
878         ENTER_GL();
879         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
880         checkGLcall("glBindBufferARB");
881
882         /* This shouldn't happen but could occur if some other function
883          * didn't handle the PBO properly. */
884         if (surface->resource.allocatedMemory)
885             ERR("The surface already has PBO memory allocated.\n");
886
887         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
888         checkGLcall("glMapBufferARB");
889
890         /* Make sure the PBO isn't set anymore in order not to break non-PBO
891          * calls. */
892         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
893         checkGLcall("glBindBufferARB");
894
895         LEAVE_GL();
896         context_release(context);
897     }
898
899     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
900     {
901         if (!rect)
902             surface_add_dirty_rect(surface, NULL);
903         else
904         {
905             struct wined3d_box b;
906
907             b.left = rect->left;
908             b.top = rect->top;
909             b.right = rect->right;
910             b.bottom = rect->bottom;
911             b.front = 0;
912             b.back = 1;
913             surface_add_dirty_rect(surface, &b);
914         }
915     }
916 }
917
918 static void surface_unmap(struct wined3d_surface *surface)
919 {
920     struct wined3d_device *device = surface->resource.device;
921     BOOL fullsurface;
922
923     TRACE("surface %p.\n", surface);
924
925     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
926
927     if (surface->flags & SFLAG_PBO)
928     {
929         const struct wined3d_gl_info *gl_info;
930         struct wined3d_context *context;
931
932         TRACE("Freeing PBO memory.\n");
933
934         context = context_acquire(device, NULL);
935         gl_info = context->gl_info;
936
937         ENTER_GL();
938         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
939         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
940         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
941         checkGLcall("glUnmapBufferARB");
942         LEAVE_GL();
943         context_release(context);
944
945         surface->resource.allocatedMemory = NULL;
946     }
947
948     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
949
950     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
951     {
952         TRACE("Not dirtified, nothing to do.\n");
953         goto done;
954     }
955
956     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
957             && surface->container.u.swapchain->front_buffer == surface)
958     {
959         if (!surface->dirtyRect.left && !surface->dirtyRect.top
960                 && surface->dirtyRect.right == surface->resource.width
961                 && surface->dirtyRect.bottom == surface->resource.height)
962         {
963             fullsurface = TRUE;
964         }
965         else
966         {
967             /* TODO: Proper partial rectangle tracking. */
968             fullsurface = FALSE;
969             surface->flags |= SFLAG_INSYSMEM;
970         }
971
972         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
973
974         /* Partial rectangle tracking is not commonly implemented, it is only
975          * done for render targets. INSYSMEM was set before to tell
976          * surface_load_location() where to read the rectangle from.
977          * Indrawable is set because all modifications from the partial
978          * sysmem copy are written back to the drawable, thus the surface is
979          * merged again in the drawable. The sysmem copy is not fully up to
980          * date because only a subrectangle was read in Map(). */
981         if (!fullsurface)
982         {
983             surface_modify_location(surface, surface->draw_binding, TRUE);
984             surface_evict_sysmem(surface);
985         }
986
987         surface->dirtyRect.left = surface->resource.width;
988         surface->dirtyRect.top = surface->resource.height;
989         surface->dirtyRect.right = 0;
990         surface->dirtyRect.bottom = 0;
991     }
992     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
993     {
994         FIXME("Depth / stencil buffer locking is not implemented.\n");
995     }
996
997 done:
998     /* Overlays have to be redrawn manually after changes with the GL implementation */
999     if (surface->overlay_dest)
1000         surface_draw_overlay(surface);
1001 }
1002
1003 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1004 {
1005     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1006         return FALSE;
1007     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1008         return FALSE;
1009     return TRUE;
1010 }
1011
1012 static void wined3d_surface_depth_blt_fbo(const struct wined3d_device *device, struct wined3d_surface *src_surface,
1013         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1014 {
1015     const struct wined3d_gl_info *gl_info;
1016     struct wined3d_context *context;
1017     DWORD src_mask, dst_mask;
1018     GLbitfield gl_mask;
1019
1020     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1021             device, src_surface, wine_dbgstr_rect(src_rect),
1022             dst_surface, wine_dbgstr_rect(dst_rect));
1023
1024     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1025     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1026
1027     if (src_mask != dst_mask)
1028     {
1029         ERR("Incompatible formats %s and %s.\n",
1030                 debug_d3dformat(src_surface->resource.format->id),
1031                 debug_d3dformat(dst_surface->resource.format->id));
1032         return;
1033     }
1034
1035     if (!src_mask)
1036     {
1037         ERR("Not a depth / stencil format: %s.\n",
1038                 debug_d3dformat(src_surface->resource.format->id));
1039         return;
1040     }
1041
1042     gl_mask = 0;
1043     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1044         gl_mask |= GL_DEPTH_BUFFER_BIT;
1045     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1046         gl_mask |= GL_STENCIL_BUFFER_BIT;
1047
1048     /* Make sure the locations are up-to-date. Loading the destination
1049      * surface isn't required if the entire surface is overwritten. */
1050     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1051     if (!surface_is_full_rect(dst_surface, dst_rect))
1052         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1053
1054     context = context_acquire(device, NULL);
1055     if (!context->valid)
1056     {
1057         context_release(context);
1058         WARN("Invalid context, skipping blit.\n");
1059         return;
1060     }
1061
1062     gl_info = context->gl_info;
1063
1064     ENTER_GL();
1065
1066     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1067     glReadBuffer(GL_NONE);
1068     checkGLcall("glReadBuffer()");
1069     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1070
1071     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1072     context_set_draw_buffer(context, GL_NONE);
1073     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1074     context_invalidate_state(context, STATE_FRAMEBUFFER);
1075
1076     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1077     {
1078         glDepthMask(GL_TRUE);
1079         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_ZWRITEENABLE));
1080     }
1081     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1082     {
1083         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1084         {
1085             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1086             context_invalidate_state(context, STATE_RENDER(WINED3D_RS_TWOSIDEDSTENCILMODE));
1087         }
1088         glStencilMask(~0U);
1089         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_STENCILWRITEMASK));
1090     }
1091
1092     glDisable(GL_SCISSOR_TEST);
1093     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1094
1095     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1096             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1097     checkGLcall("glBlitFramebuffer()");
1098
1099     LEAVE_GL();
1100
1101     if (wined3d_settings.strict_draw_ordering)
1102         wglFlush(); /* Flush to ensure ordering across contexts. */
1103
1104     context_release(context);
1105 }
1106
1107 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1108  * Depth / stencil is not supported. */
1109 static void surface_blt_fbo(const struct wined3d_device *device, enum wined3d_texture_filter_type filter,
1110         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1111         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1112 {
1113     const struct wined3d_gl_info *gl_info;
1114     struct wined3d_context *context;
1115     RECT src_rect, dst_rect;
1116     GLenum gl_filter;
1117     GLenum buffer;
1118
1119     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1120     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1121             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1122     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1123             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1124
1125     src_rect = *src_rect_in;
1126     dst_rect = *dst_rect_in;
1127
1128     switch (filter)
1129     {
1130         case WINED3D_TEXF_LINEAR:
1131             gl_filter = GL_LINEAR;
1132             break;
1133
1134         default:
1135             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1136         case WINED3D_TEXF_NONE:
1137         case WINED3D_TEXF_POINT:
1138             gl_filter = GL_NEAREST;
1139             break;
1140     }
1141
1142     /* Resolve the source surface first if needed. */
1143     if (src_location == SFLAG_INRB_MULTISAMPLE
1144             && (src_surface->resource.format->id != dst_surface->resource.format->id
1145                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1146                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1147         src_location = SFLAG_INRB_RESOLVED;
1148
1149     /* Make sure the locations are up-to-date. Loading the destination
1150      * surface isn't required if the entire surface is overwritten. (And is
1151      * in fact harmful if we're being called by surface_load_location() with
1152      * the purpose of loading the destination surface.) */
1153     surface_load_location(src_surface, src_location, NULL);
1154     if (!surface_is_full_rect(dst_surface, &dst_rect))
1155         surface_load_location(dst_surface, dst_location, NULL);
1156
1157     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1158     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1159     else context = context_acquire(device, NULL);
1160
1161     if (!context->valid)
1162     {
1163         context_release(context);
1164         WARN("Invalid context, skipping blit.\n");
1165         return;
1166     }
1167
1168     gl_info = context->gl_info;
1169
1170     if (src_location == SFLAG_INDRAWABLE)
1171     {
1172         TRACE("Source surface %p is onscreen.\n", src_surface);
1173         buffer = surface_get_gl_buffer(src_surface);
1174         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1175     }
1176     else
1177     {
1178         TRACE("Source surface %p is offscreen.\n", src_surface);
1179         buffer = GL_COLOR_ATTACHMENT0;
1180     }
1181
1182     ENTER_GL();
1183     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1184     glReadBuffer(buffer);
1185     checkGLcall("glReadBuffer()");
1186     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1187     LEAVE_GL();
1188
1189     if (dst_location == SFLAG_INDRAWABLE)
1190     {
1191         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1192         buffer = surface_get_gl_buffer(dst_surface);
1193         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1194     }
1195     else
1196     {
1197         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1198         buffer = GL_COLOR_ATTACHMENT0;
1199     }
1200
1201     ENTER_GL();
1202     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1203     context_set_draw_buffer(context, buffer);
1204     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1205     context_invalidate_state(context, STATE_FRAMEBUFFER);
1206
1207     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1208     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE));
1209     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE1));
1210     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE2));
1211     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE3));
1212
1213     glDisable(GL_SCISSOR_TEST);
1214     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1215
1216     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1217             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1218     checkGLcall("glBlitFramebuffer()");
1219
1220     LEAVE_GL();
1221
1222     if (wined3d_settings.strict_draw_ordering
1223             || (dst_location == SFLAG_INDRAWABLE
1224             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1225         wglFlush();
1226
1227     context_release(context);
1228 }
1229
1230 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1231         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
1232         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
1233 {
1234     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1235         return FALSE;
1236
1237     /* Source and/or destination need to be on the GL side */
1238     if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM)
1239         return FALSE;
1240
1241     switch (blit_op)
1242     {
1243         case WINED3D_BLIT_OP_COLOR_BLIT:
1244             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1245                 return FALSE;
1246             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1247                 return FALSE;
1248             break;
1249
1250         case WINED3D_BLIT_OP_DEPTH_BLIT:
1251             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1252                 return FALSE;
1253             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1254                 return FALSE;
1255             break;
1256
1257         default:
1258             return FALSE;
1259     }
1260
1261     if (!(src_format->id == dst_format->id
1262             || (is_identity_fixup(src_format->color_fixup)
1263             && is_identity_fixup(dst_format->color_fixup))))
1264         return FALSE;
1265
1266     return TRUE;
1267 }
1268
1269 /* This function checks if the primary render target uses the 8bit paletted format. */
1270 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1271 {
1272     if (device->fb.render_targets && device->fb.render_targets[0])
1273     {
1274         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1275         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1276                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1277             return TRUE;
1278     }
1279     return FALSE;
1280 }
1281
1282 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1283         DWORD color, struct wined3d_color *float_color)
1284 {
1285     const struct wined3d_format *format = surface->resource.format;
1286     const struct wined3d_device *device = surface->resource.device;
1287
1288     switch (format->id)
1289     {
1290         case WINED3DFMT_P8_UINT:
1291             if (surface->palette)
1292             {
1293                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1294                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1295                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1296             }
1297             else
1298             {
1299                 float_color->r = 0.0f;
1300                 float_color->g = 0.0f;
1301                 float_color->b = 0.0f;
1302             }
1303             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1304             break;
1305
1306         case WINED3DFMT_B5G6R5_UNORM:
1307             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1308             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1309             float_color->b = (color & 0x1f) / 31.0f;
1310             float_color->a = 1.0f;
1311             break;
1312
1313         case WINED3DFMT_B8G8R8_UNORM:
1314         case WINED3DFMT_B8G8R8X8_UNORM:
1315             float_color->r = D3DCOLOR_R(color);
1316             float_color->g = D3DCOLOR_G(color);
1317             float_color->b = D3DCOLOR_B(color);
1318             float_color->a = 1.0f;
1319             break;
1320
1321         case WINED3DFMT_B8G8R8A8_UNORM:
1322             float_color->r = D3DCOLOR_R(color);
1323             float_color->g = D3DCOLOR_G(color);
1324             float_color->b = D3DCOLOR_B(color);
1325             float_color->a = D3DCOLOR_A(color);
1326             break;
1327
1328         default:
1329             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1330             return FALSE;
1331     }
1332
1333     return TRUE;
1334 }
1335
1336 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1337 {
1338     const struct wined3d_format *format = surface->resource.format;
1339
1340     switch (format->id)
1341     {
1342         case WINED3DFMT_S1_UINT_D15_UNORM:
1343             *float_depth = depth / (float)0x00007fff;
1344             break;
1345
1346         case WINED3DFMT_D16_UNORM:
1347             *float_depth = depth / (float)0x0000ffff;
1348             break;
1349
1350         case WINED3DFMT_D24_UNORM_S8_UINT:
1351         case WINED3DFMT_X8D24_UNORM:
1352             *float_depth = depth / (float)0x00ffffff;
1353             break;
1354
1355         case WINED3DFMT_D32_UNORM:
1356             *float_depth = depth / (float)0xffffffff;
1357             break;
1358
1359         default:
1360             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1361             return FALSE;
1362     }
1363
1364     return TRUE;
1365 }
1366
1367 /* Do not call while under the GL lock. */
1368 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1369 {
1370     const struct wined3d_resource *resource = &surface->resource;
1371     struct wined3d_device *device = resource->device;
1372     const struct blit_shader *blitter;
1373
1374     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1375             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1376     if (!blitter)
1377     {
1378         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1379         return WINED3DERR_INVALIDCALL;
1380     }
1381
1382     return blitter->depth_fill(device, surface, rect, depth);
1383 }
1384
1385 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1386         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1387 {
1388     struct wined3d_device *device = src_surface->resource.device;
1389
1390     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1391             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1392             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1393         return WINED3DERR_INVALIDCALL;
1394
1395     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1396
1397     surface_modify_ds_location(dst_surface, SFLAG_INTEXTURE,
1398             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1399
1400     return WINED3D_OK;
1401 }
1402
1403 /* Do not call while under the GL lock. */
1404 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1405         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1406         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter)
1407 {
1408     struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1409     struct wined3d_device *device = dst_surface->resource.device;
1410     DWORD src_ds_flags, dst_ds_flags;
1411     RECT src_rect, dst_rect;
1412     BOOL scale, convert;
1413
1414     static const DWORD simple_blit = WINEDDBLT_ASYNC
1415             | WINEDDBLT_COLORFILL
1416             | WINEDDBLT_WAIT
1417             | WINEDDBLT_DEPTHFILL
1418             | WINEDDBLT_DONOTWAIT;
1419
1420     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1421             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1422             flags, fx, debug_d3dtexturefiltertype(filter));
1423     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1424
1425     if (fx)
1426     {
1427         TRACE("dwSize %#x.\n", fx->dwSize);
1428         TRACE("dwDDFX %#x.\n", fx->dwDDFX);
1429         TRACE("dwROP %#x.\n", fx->dwROP);
1430         TRACE("dwDDROP %#x.\n", fx->dwDDROP);
1431         TRACE("dwRotationAngle %#x.\n", fx->dwRotationAngle);
1432         TRACE("dwZBufferOpCode %#x.\n", fx->dwZBufferOpCode);
1433         TRACE("dwZBufferLow %#x.\n", fx->dwZBufferLow);
1434         TRACE("dwZBufferHigh %#x.\n", fx->dwZBufferHigh);
1435         TRACE("dwZBufferBaseDest %#x.\n", fx->dwZBufferBaseDest);
1436         TRACE("dwZDestConstBitDepth %#x.\n", fx->dwZDestConstBitDepth);
1437         TRACE("lpDDSZBufferDest %p.\n", fx->u1.lpDDSZBufferDest);
1438         TRACE("dwZSrcConstBitDepth %#x.\n", fx->dwZSrcConstBitDepth);
1439         TRACE("lpDDSZBufferSrc %p.\n", fx->u2.lpDDSZBufferSrc);
1440         TRACE("dwAlphaEdgeBlendBitDepth %#x.\n", fx->dwAlphaEdgeBlendBitDepth);
1441         TRACE("dwAlphaEdgeBlend %#x.\n", fx->dwAlphaEdgeBlend);
1442         TRACE("dwReserved %#x.\n", fx->dwReserved);
1443         TRACE("dwAlphaDestConstBitDepth %#x.\n", fx->dwAlphaDestConstBitDepth);
1444         TRACE("lpDDSAlphaDest %p.\n", fx->u3.lpDDSAlphaDest);
1445         TRACE("dwAlphaSrcConstBitDepth %#x.\n", fx->dwAlphaSrcConstBitDepth);
1446         TRACE("lpDDSAlphaSrc %p.\n", fx->u4.lpDDSAlphaSrc);
1447         TRACE("lpDDSPattern %p.\n", fx->u5.lpDDSPattern);
1448         TRACE("ddckDestColorkey {%#x, %#x}.\n",
1449                 fx->ddckDestColorkey.color_space_low_value,
1450                 fx->ddckDestColorkey.color_space_high_value);
1451         TRACE("ddckSrcColorkey {%#x, %#x}.\n",
1452                 fx->ddckSrcColorkey.color_space_low_value,
1453                 fx->ddckSrcColorkey.color_space_high_value);
1454     }
1455
1456     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1457     {
1458         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1459         return WINEDDERR_SURFACEBUSY;
1460     }
1461
1462     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1463
1464     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1465             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1466             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1467             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1468             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1469     {
1470         WARN("The application gave us a bad destination rectangle.\n");
1471         return WINEDDERR_INVALIDRECT;
1472     }
1473
1474     if (src_surface)
1475     {
1476         surface_get_rect(src_surface, src_rect_in, &src_rect);
1477
1478         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1479                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1480                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1481                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1482                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1483         {
1484             WARN("Application gave us bad source rectangle for Blt.\n");
1485             return WINEDDERR_INVALIDRECT;
1486         }
1487     }
1488     else
1489     {
1490         memset(&src_rect, 0, sizeof(src_rect));
1491     }
1492
1493     if (!fx || !(fx->dwDDFX))
1494         flags &= ~WINEDDBLT_DDFX;
1495
1496     if (flags & WINEDDBLT_WAIT)
1497         flags &= ~WINEDDBLT_WAIT;
1498
1499     if (flags & WINEDDBLT_ASYNC)
1500     {
1501         static unsigned int once;
1502
1503         if (!once++)
1504             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1505         flags &= ~WINEDDBLT_ASYNC;
1506     }
1507
1508     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1509     if (flags & WINEDDBLT_DONOTWAIT)
1510     {
1511         static unsigned int once;
1512
1513         if (!once++)
1514             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1515         flags &= ~WINEDDBLT_DONOTWAIT;
1516     }
1517
1518     if (!device->d3d_initialized)
1519     {
1520         WARN("D3D not initialized, using fallback.\n");
1521         goto cpu;
1522     }
1523
1524     /* We want to avoid invalidating the sysmem location for converted
1525      * surfaces, since otherwise we'd have to convert the data back when
1526      * locking them. */
1527     if (dst_surface->flags & SFLAG_CONVERTED)
1528     {
1529         WARN("Converted surface, using CPU blit.\n");
1530         return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1531     }
1532
1533     if (flags & ~simple_blit)
1534     {
1535         WARN("Using fallback for complex blit (%#x).\n", flags);
1536         goto fallback;
1537     }
1538
1539     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1540         src_swapchain = src_surface->container.u.swapchain;
1541     else
1542         src_swapchain = NULL;
1543
1544     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1545         dst_swapchain = dst_surface->container.u.swapchain;
1546     else
1547         dst_swapchain = NULL;
1548
1549     /* This isn't strictly needed. FBO blits for example could deal with
1550      * cross-swapchain blits by first downloading the source to a texture
1551      * before switching to the destination context. We just have this here to
1552      * not have to deal with the issue, since cross-swapchain blits should be
1553      * rare. */
1554     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1555     {
1556         FIXME("Using fallback for cross-swapchain blit.\n");
1557         goto fallback;
1558     }
1559
1560     scale = src_surface
1561             && (src_rect.right - src_rect.left != dst_rect.right - dst_rect.left
1562             || src_rect.bottom - src_rect.top != dst_rect.bottom - dst_rect.top);
1563     convert = src_surface && src_surface->resource.format->id != dst_surface->resource.format->id;
1564
1565     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1566     if (src_surface)
1567         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1568     else
1569         src_ds_flags = 0;
1570
1571     if (src_ds_flags || dst_ds_flags)
1572     {
1573         if (flags & WINEDDBLT_DEPTHFILL)
1574         {
1575             float depth;
1576
1577             TRACE("Depth fill.\n");
1578
1579             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1580                 return WINED3DERR_INVALIDCALL;
1581
1582             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1583                 return WINED3D_OK;
1584         }
1585         else
1586         {
1587             if (src_ds_flags != dst_ds_flags)
1588             {
1589                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1590                 return WINED3DERR_INVALIDCALL;
1591             }
1592
1593             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1594                 return WINED3D_OK;
1595         }
1596     }
1597     else
1598     {
1599         /* In principle this would apply to depth blits as well, but we don't
1600          * implement those in the CPU blitter at the moment. */
1601         if ((dst_surface->flags & SFLAG_INSYSMEM)
1602                 && (!src_surface || (src_surface->flags & SFLAG_INSYSMEM)))
1603         {
1604             if (scale)
1605                 TRACE("Not doing sysmem blit because of scaling.\n");
1606             else if (convert)
1607                 TRACE("Not doing sysmem blit because of format conversion.\n");
1608             else
1609                 return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1610         }
1611
1612         if (flags & WINEDDBLT_COLORFILL)
1613         {
1614             struct wined3d_color color;
1615
1616             TRACE("Color fill.\n");
1617
1618             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1619                 goto fallback;
1620
1621             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1622                 return WINED3D_OK;
1623         }
1624         else
1625         {
1626             TRACE("Color blit.\n");
1627
1628             /* Upload */
1629             if ((src_surface->flags & SFLAG_INSYSMEM) && !(dst_surface->flags & SFLAG_INSYSMEM))
1630             {
1631                 if (scale)
1632                     TRACE("Not doing upload because of scaling.\n");
1633                 else if (convert)
1634                     TRACE("Not doing upload because of format conversion.\n");
1635                 else
1636                 {
1637                     POINT dst_point = {dst_rect.left, dst_rect.top};
1638
1639                     if (SUCCEEDED(surface_upload_from_surface(dst_surface, &dst_point, src_surface, &src_rect)))
1640                     {
1641                         if (!surface_is_offscreen(dst_surface))
1642                             surface_load_location(dst_surface, dst_surface->draw_binding, NULL);
1643                         return WINED3D_OK;
1644                     }
1645                 }
1646             }
1647
1648             /* Use present for back -> front blits. The idea behind this is
1649              * that present is potentially faster than a blit, in particular
1650              * when FBO blits aren't available. Some ddraw applications like
1651              * Half-Life and Prince of Persia 3D use Blt() from the backbuffer
1652              * to the frontbuffer instead of doing a Flip(). D3D8 and D3D9
1653              * applications can't blit directly to the frontbuffer. */
1654             if (dst_swapchain && dst_swapchain->back_buffers
1655                     && dst_surface == dst_swapchain->front_buffer
1656                     && src_surface == dst_swapchain->back_buffers[0])
1657             {
1658                 enum wined3d_swap_effect swap_effect = dst_swapchain->desc.swap_effect;
1659
1660                 TRACE("Using present for backbuffer -> frontbuffer blit.\n");
1661
1662                 /* Set the swap effect to COPY, we don't want the backbuffer
1663                  * to become undefined. */
1664                 dst_swapchain->desc.swap_effect = WINED3D_SWAP_EFFECT_COPY;
1665                 wined3d_swapchain_present(dst_swapchain, NULL, NULL, dst_swapchain->win_handle, NULL, 0);
1666                 dst_swapchain->desc.swap_effect = swap_effect;
1667
1668                 return WINED3D_OK;
1669             }
1670
1671             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1672                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1673                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1674             {
1675                 TRACE("Using FBO blit.\n");
1676
1677                 surface_blt_fbo(device, filter,
1678                         src_surface, src_surface->draw_binding, &src_rect,
1679                         dst_surface, dst_surface->draw_binding, &dst_rect);
1680                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1681                 return WINED3D_OK;
1682             }
1683
1684             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1685                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1686                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1687             {
1688                 TRACE("Using arbfp blit.\n");
1689
1690                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1691                     return WINED3D_OK;
1692             }
1693         }
1694     }
1695
1696 fallback:
1697
1698     /* Special cases for render targets. */
1699     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1700             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1701     {
1702         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1703                 src_surface, &src_rect, flags, fx, filter)))
1704             return WINED3D_OK;
1705     }
1706
1707 cpu:
1708
1709     /* For the rest call the X11 surface implementation. For render targets
1710      * this should be implemented OpenGL accelerated in BltOverride, other
1711      * blits are rather rare. */
1712     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1713 }
1714
1715 HRESULT CDECL wined3d_surface_get_render_target_data(struct wined3d_surface *surface,
1716         struct wined3d_surface *render_target)
1717 {
1718     TRACE("surface %p, render_target %p.\n", surface, render_target);
1719
1720     /* TODO: Check surface sizes, pools, etc. */
1721
1722     if (render_target->resource.multisample_type)
1723         return WINED3DERR_INVALIDCALL;
1724
1725     return wined3d_surface_blt(surface, NULL, render_target, NULL, 0, NULL, WINED3D_TEXF_POINT);
1726 }
1727
1728 /* Context activation is done by the caller. */
1729 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1730 {
1731     if (surface->flags & SFLAG_DIBSECTION)
1732     {
1733         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1734     }
1735     else
1736     {
1737         if (!surface->resource.heapMemory)
1738             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1739         else if (!(surface->flags & SFLAG_CLIENT))
1740             ERR("Surface %p has heapMemory %p and flags %#x.\n",
1741                     surface, surface->resource.heapMemory, surface->flags);
1742
1743         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1744                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1745     }
1746
1747     ENTER_GL();
1748     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1749     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1750     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1751             surface->resource.size, surface->resource.allocatedMemory));
1752     checkGLcall("glGetBufferSubDataARB");
1753     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1754     checkGLcall("glDeleteBuffersARB");
1755     LEAVE_GL();
1756
1757     surface->pbo = 0;
1758     surface->flags &= ~SFLAG_PBO;
1759 }
1760
1761 /* Do not call while under the GL lock. */
1762 static void surface_unload(struct wined3d_resource *resource)
1763 {
1764     struct wined3d_surface *surface = surface_from_resource(resource);
1765     struct wined3d_renderbuffer_entry *entry, *entry2;
1766     struct wined3d_device *device = resource->device;
1767     const struct wined3d_gl_info *gl_info;
1768     struct wined3d_context *context;
1769
1770     TRACE("surface %p.\n", surface);
1771
1772     if (resource->pool == WINED3D_POOL_DEFAULT)
1773     {
1774         /* Default pool resources are supposed to be destroyed before Reset is called.
1775          * Implicit resources stay however. So this means we have an implicit render target
1776          * or depth stencil. The content may be destroyed, but we still have to tear down
1777          * opengl resources, so we cannot leave early.
1778          *
1779          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1780          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1781          * or the depth stencil into an FBO the texture or render buffer will be removed
1782          * and all flags get lost
1783          */
1784         if (!(surface->flags & SFLAG_PBO))
1785             surface_init_sysmem(surface);
1786         /* We also get here when the ddraw swapchain is destroyed, for example
1787          * for a mode switch. In this case this surface won't necessarily be
1788          * an implicit surface. We have to mark it lost so that the
1789          * application can restore it after the mode switch. */
1790         surface->flags |= SFLAG_LOST;
1791     }
1792     else
1793     {
1794         /* Load the surface into system memory */
1795         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1796         surface_modify_location(surface, surface->draw_binding, FALSE);
1797     }
1798     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1799     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1800     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1801
1802     context = context_acquire(device, NULL);
1803     gl_info = context->gl_info;
1804
1805     /* Destroy PBOs, but load them into real sysmem before */
1806     if (surface->flags & SFLAG_PBO)
1807         surface_remove_pbo(surface, gl_info);
1808
1809     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1810      * all application-created targets the application has to release the surface
1811      * before calling _Reset
1812      */
1813     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1814     {
1815         ENTER_GL();
1816         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1817         LEAVE_GL();
1818         list_remove(&entry->entry);
1819         HeapFree(GetProcessHeap(), 0, entry);
1820     }
1821     list_init(&surface->renderbuffers);
1822     surface->current_renderbuffer = NULL;
1823
1824     ENTER_GL();
1825
1826     /* If we're in a texture, the texture name belongs to the texture.
1827      * Otherwise, destroy it. */
1828     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1829     {
1830         glDeleteTextures(1, &surface->texture_name);
1831         surface->texture_name = 0;
1832         glDeleteTextures(1, &surface->texture_name_srgb);
1833         surface->texture_name_srgb = 0;
1834     }
1835     if (surface->rb_multisample)
1836     {
1837         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1838         surface->rb_multisample = 0;
1839     }
1840     if (surface->rb_resolved)
1841     {
1842         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1843         surface->rb_resolved = 0;
1844     }
1845
1846     LEAVE_GL();
1847
1848     context_release(context);
1849
1850     resource_unload(resource);
1851 }
1852
1853 static const struct wined3d_resource_ops surface_resource_ops =
1854 {
1855     surface_unload,
1856 };
1857
1858 static const struct wined3d_surface_ops surface_ops =
1859 {
1860     surface_private_setup,
1861     surface_realize_palette,
1862     surface_map,
1863     surface_unmap,
1864 };
1865
1866 /*****************************************************************************
1867  * Initializes the GDI surface, aka creates the DIB section we render to
1868  * The DIB section creation is done by calling GetDC, which will create the
1869  * section and releasing the dc to allow the app to use it. The dib section
1870  * will stay until the surface is released
1871  *
1872  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1873  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1874  * avoid confusion in the shared surface code.
1875  *
1876  * Returns:
1877  *  WINED3D_OK on success
1878  *  The return values of called methods on failure
1879  *
1880  *****************************************************************************/
1881 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1882 {
1883     HRESULT hr;
1884
1885     TRACE("surface %p.\n", surface);
1886
1887     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1888     {
1889         ERR("Overlays not yet supported by GDI surfaces.\n");
1890         return WINED3DERR_INVALIDCALL;
1891     }
1892
1893     /* Sysmem textures have memory already allocated - release it,
1894      * this avoids an unnecessary memcpy. */
1895     hr = surface_create_dib_section(surface);
1896     if (SUCCEEDED(hr))
1897     {
1898         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1899         surface->resource.heapMemory = NULL;
1900         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1901     }
1902
1903     /* We don't mind the nonpow2 stuff in GDI. */
1904     surface->pow2Width = surface->resource.width;
1905     surface->pow2Height = surface->resource.height;
1906
1907     return WINED3D_OK;
1908 }
1909
1910 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1911 {
1912     struct wined3d_palette *palette = surface->palette;
1913
1914     TRACE("surface %p.\n", surface);
1915
1916     if (!palette) return;
1917
1918     if (surface->flags & SFLAG_DIBSECTION)
1919     {
1920         RGBQUAD col[256];
1921         unsigned int i;
1922
1923         TRACE("Updating the DC's palette.\n");
1924
1925         for (i = 0; i < 256; ++i)
1926         {
1927             col[i].rgbRed = palette->palents[i].peRed;
1928             col[i].rgbGreen = palette->palents[i].peGreen;
1929             col[i].rgbBlue = palette->palents[i].peBlue;
1930             col[i].rgbReserved = 0;
1931         }
1932         SetDIBColorTable(surface->hDC, 0, 256, col);
1933     }
1934
1935     /* Update the image because of the palette change. Some games like e.g.
1936      * Red Alert call SetEntries a lot to implement fading. */
1937     /* Tell the swapchain to update the screen. */
1938     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1939     {
1940         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1941         if (surface == swapchain->front_buffer)
1942         {
1943             x11_copy_to_screen(swapchain, NULL);
1944         }
1945     }
1946 }
1947
1948 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
1949 {
1950     TRACE("surface %p, rect %s, flags %#x.\n",
1951             surface, wine_dbgstr_rect(rect), flags);
1952
1953     if (!(surface->flags & SFLAG_DIBSECTION))
1954     {
1955         HRESULT hr;
1956
1957         /* This happens on gdi surfaces if the application set a user pointer
1958          * and resets it. Recreate the DIB section. */
1959         if (FAILED(hr = surface_create_dib_section(surface)))
1960         {
1961             ERR("Failed to create dib section, hr %#x.\n", hr);
1962             return;
1963         }
1964         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1965         surface->resource.heapMemory = NULL;
1966         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1967     }
1968 }
1969
1970 static void gdi_surface_unmap(struct wined3d_surface *surface)
1971 {
1972     TRACE("surface %p.\n", surface);
1973
1974     /* Tell the swapchain to update the screen. */
1975     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1976     {
1977         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1978         if (surface == swapchain->front_buffer)
1979         {
1980             x11_copy_to_screen(swapchain, &surface->lockedRect);
1981         }
1982     }
1983
1984     memset(&surface->lockedRect, 0, sizeof(RECT));
1985 }
1986
1987 static const struct wined3d_surface_ops gdi_surface_ops =
1988 {
1989     gdi_surface_private_setup,
1990     gdi_surface_realize_palette,
1991     gdi_surface_map,
1992     gdi_surface_unmap,
1993 };
1994
1995 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
1996 {
1997     GLuint *name;
1998     DWORD flag;
1999
2000     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2001
2002     if(srgb)
2003     {
2004         name = &surface->texture_name_srgb;
2005         flag = SFLAG_INSRGBTEX;
2006     }
2007     else
2008     {
2009         name = &surface->texture_name;
2010         flag = SFLAG_INTEXTURE;
2011     }
2012
2013     if (!*name && new_name)
2014     {
2015         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2016          * surface has no texture name yet. See if we can get rid of this. */
2017         if (surface->flags & flag)
2018         {
2019             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2020             surface_modify_location(surface, flag, FALSE);
2021         }
2022     }
2023
2024     *name = new_name;
2025     surface_force_reload(surface);
2026 }
2027
2028 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
2029 {
2030     TRACE("surface %p, target %#x.\n", surface, target);
2031
2032     if (surface->texture_target != target)
2033     {
2034         if (target == GL_TEXTURE_RECTANGLE_ARB)
2035         {
2036             surface->flags &= ~SFLAG_NORMCOORD;
2037         }
2038         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2039         {
2040             surface->flags |= SFLAG_NORMCOORD;
2041         }
2042     }
2043     surface->texture_target = target;
2044     surface_force_reload(surface);
2045 }
2046
2047 /* Context activation is done by the caller. */
2048 void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
2049 {
2050     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
2051
2052     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2053     {
2054         struct wined3d_texture *texture = surface->container.u.texture;
2055
2056         TRACE("Passing to container (%p).\n", texture);
2057         texture->texture_ops->texture_bind(texture, context, srgb);
2058     }
2059     else
2060     {
2061         if (surface->texture_level)
2062         {
2063             ERR("Standalone surface %p is non-zero texture level %u.\n",
2064                     surface, surface->texture_level);
2065         }
2066
2067         if (srgb)
2068             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
2069
2070         ENTER_GL();
2071
2072         if (!surface->texture_name)
2073         {
2074             glGenTextures(1, &surface->texture_name);
2075             checkGLcall("glGenTextures");
2076
2077             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
2078
2079             context_bind_texture(context, surface->texture_target, surface->texture_name);
2080             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2081             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2082             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
2083             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2084             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2085             checkGLcall("glTexParameteri");
2086         }
2087         else
2088         {
2089             context_bind_texture(context, surface->texture_target, surface->texture_name);
2090         }
2091
2092         LEAVE_GL();
2093     }
2094 }
2095
2096 /* This call just downloads data, the caller is responsible for binding the
2097  * correct texture. */
2098 /* Context activation is done by the caller. */
2099 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2100 {
2101     const struct wined3d_format *format = surface->resource.format;
2102
2103     /* Only support read back of converted P8 surfaces. */
2104     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2105     {
2106         ERR("Trying to read back converted surface %p with format %s.\n", surface, debug_d3dformat(format->id));
2107         return;
2108     }
2109
2110     ENTER_GL();
2111
2112     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2113     {
2114         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2115                 surface, surface->texture_level, format->glFormat, format->glType,
2116                 surface->resource.allocatedMemory);
2117
2118         if (surface->flags & SFLAG_PBO)
2119         {
2120             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2121             checkGLcall("glBindBufferARB");
2122             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2123             checkGLcall("glGetCompressedTexImageARB");
2124             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2125             checkGLcall("glBindBufferARB");
2126         }
2127         else
2128         {
2129             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2130                     surface->texture_level, surface->resource.allocatedMemory));
2131             checkGLcall("glGetCompressedTexImageARB");
2132         }
2133
2134         LEAVE_GL();
2135     }
2136     else
2137     {
2138         void *mem;
2139         GLenum gl_format = format->glFormat;
2140         GLenum gl_type = format->glType;
2141         int src_pitch = 0;
2142         int dst_pitch = 0;
2143
2144         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2145         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2146         {
2147             gl_format = GL_ALPHA;
2148             gl_type = GL_UNSIGNED_BYTE;
2149         }
2150
2151         if (surface->flags & SFLAG_NONPOW2)
2152         {
2153             unsigned char alignment = surface->resource.device->surface_alignment;
2154             src_pitch = format->byte_count * surface->pow2Width;
2155             dst_pitch = wined3d_surface_get_pitch(surface);
2156             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2157             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2158         }
2159         else
2160         {
2161             mem = surface->resource.allocatedMemory;
2162         }
2163
2164         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2165                 surface, surface->texture_level, gl_format, gl_type, mem);
2166
2167         if (surface->flags & SFLAG_PBO)
2168         {
2169             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2170             checkGLcall("glBindBufferARB");
2171
2172             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2173             checkGLcall("glGetTexImage");
2174
2175             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2176             checkGLcall("glBindBufferARB");
2177         }
2178         else
2179         {
2180             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2181             checkGLcall("glGetTexImage");
2182         }
2183         LEAVE_GL();
2184
2185         if (surface->flags & SFLAG_NONPOW2)
2186         {
2187             const BYTE *src_data;
2188             BYTE *dst_data;
2189             UINT y;
2190             /*
2191              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2192              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2193              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2194              *
2195              * We're doing this...
2196              *
2197              * instead of boxing the texture :
2198              * |<-texture width ->|  -->pow2width|   /\
2199              * |111111111111111111|              |   |
2200              * |222 Texture 222222| boxed empty  | texture height
2201              * |3333 Data 33333333|              |   |
2202              * |444444444444444444|              |   \/
2203              * -----------------------------------   |
2204              * |     boxed  empty | boxed empty  | pow2height
2205              * |                  |              |   \/
2206              * -----------------------------------
2207              *
2208              *
2209              * we're repacking the data to the expected texture width
2210              *
2211              * |<-texture width ->|  -->pow2width|   /\
2212              * |111111111111111111222222222222222|   |
2213              * |222333333333333333333444444444444| texture height
2214              * |444444                           |   |
2215              * |                                 |   \/
2216              * |                                 |   |
2217              * |            empty                | pow2height
2218              * |                                 |   \/
2219              * -----------------------------------
2220              *
2221              * == is the same as
2222              *
2223              * |<-texture width ->|    /\
2224              * |111111111111111111|
2225              * |222222222222222222|texture height
2226              * |333333333333333333|
2227              * |444444444444444444|    \/
2228              * --------------------
2229              *
2230              * this also means that any references to allocatedMemory should work with the data as if were a
2231              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2232              *
2233              * internally the texture is still stored in a boxed format so any references to textureName will
2234              * get a boxed texture with width pow2width and not a texture of width resource.width.
2235              *
2236              * Performance should not be an issue, because applications normally do not lock the surfaces when
2237              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2238              * and doesn't have to be re-read. */
2239             src_data = mem;
2240             dst_data = surface->resource.allocatedMemory;
2241             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2242             for (y = 1; y < surface->resource.height; ++y)
2243             {
2244                 /* skip the first row */
2245                 src_data += src_pitch;
2246                 dst_data += dst_pitch;
2247                 memcpy(dst_data, src_data, dst_pitch);
2248             }
2249
2250             HeapFree(GetProcessHeap(), 0, mem);
2251         }
2252     }
2253
2254     /* Surface has now been downloaded */
2255     surface->flags |= SFLAG_INSYSMEM;
2256 }
2257
2258 /* This call just uploads data, the caller is responsible for binding the
2259  * correct texture. */
2260 /* Context activation is done by the caller. */
2261 static void surface_upload_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2262         const struct wined3d_format *format, const RECT *src_rect, UINT src_pitch, const POINT *dst_point,
2263         BOOL srgb, const struct wined3d_bo_address *data)
2264 {
2265     UINT update_w = src_rect->right - src_rect->left;
2266     UINT update_h = src_rect->bottom - src_rect->top;
2267
2268     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_pitch %u, dst_point %s, srgb %#x, data {%#x:%p}.\n",
2269             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_pitch,
2270             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2271
2272     if (surface->flags & SFLAG_LOCKED)
2273     {
2274         WARN("Uploading a surface that is currently mapped, setting SFLAG_PIN_SYSMEM.\n");
2275         surface->flags |= SFLAG_PIN_SYSMEM;
2276     }
2277
2278     if (format->flags & WINED3DFMT_FLAG_HEIGHT_SCALE)
2279     {
2280         update_h *= format->height_scale.numerator;
2281         update_h /= format->height_scale.denominator;
2282     }
2283
2284     ENTER_GL();
2285
2286     if (data->buffer_object)
2287     {
2288         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2289         checkGLcall("glBindBufferARB");
2290     }
2291
2292     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2293     {
2294         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2295         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2296         const BYTE *addr = data->addr;
2297         GLenum internal;
2298
2299         addr += (src_rect->top / format->block_height) * src_pitch;
2300         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2301
2302         if (srgb)
2303             internal = format->glGammaInternal;
2304         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2305             internal = format->rtInternal;
2306         else
2307             internal = format->glInternal;
2308
2309         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2310                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2311                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2312
2313         if (row_length == src_pitch)
2314         {
2315             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2316                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2317         }
2318         else
2319         {
2320             UINT row, y;
2321
2322             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2323              * can't use the unpack row length like below. */
2324             for (row = 0, y = dst_point->y; row < row_count; ++row)
2325             {
2326                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2327                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2328                 y += format->block_height;
2329                 addr += src_pitch;
2330             }
2331         }
2332         checkGLcall("glCompressedTexSubImage2DARB");
2333     }
2334     else
2335     {
2336         const BYTE *addr = data->addr;
2337
2338         addr += src_rect->top * src_pitch;
2339         addr += src_rect->left * format->byte_count;
2340
2341         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2342                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2343                 update_w, update_h, format->glFormat, format->glType, addr);
2344
2345         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_pitch / format->byte_count);
2346         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2347                 update_w, update_h, format->glFormat, format->glType, addr);
2348         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2349         checkGLcall("glTexSubImage2D");
2350     }
2351
2352     if (data->buffer_object)
2353     {
2354         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2355         checkGLcall("glBindBufferARB");
2356     }
2357
2358     LEAVE_GL();
2359
2360     if (wined3d_settings.strict_draw_ordering)
2361         wglFlush();
2362
2363     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2364     {
2365         struct wined3d_device *device = surface->resource.device;
2366         unsigned int i;
2367
2368         for (i = 0; i < device->context_count; ++i)
2369         {
2370             context_surface_update(device->contexts[i], surface);
2371         }
2372     }
2373 }
2374
2375 HRESULT surface_upload_from_surface(struct wined3d_surface *dst_surface, const POINT *dst_point,
2376         struct wined3d_surface *src_surface, const RECT *src_rect)
2377 {
2378     const struct wined3d_format *src_format;
2379     const struct wined3d_format *dst_format;
2380     const struct wined3d_gl_info *gl_info;
2381     enum wined3d_conversion_type convert;
2382     struct wined3d_context *context;
2383     struct wined3d_bo_address data;
2384     struct wined3d_format format;
2385     UINT update_w, update_h;
2386     UINT dst_w, dst_h;
2387     UINT src_w, src_h;
2388     UINT src_pitch;
2389     POINT p;
2390     RECT r;
2391
2392     TRACE("dst_surface %p, dst_point %s, src_surface %p, src_rect %s.\n",
2393             dst_surface, wine_dbgstr_point(dst_point),
2394             src_surface, wine_dbgstr_rect(src_rect));
2395
2396     src_format = src_surface->resource.format;
2397     dst_format = dst_surface->resource.format;
2398
2399     if (src_format->id != dst_format->id)
2400     {
2401         WARN("Source and destination surfaces should have the same format.\n");
2402         return WINED3DERR_INVALIDCALL;
2403     }
2404
2405     if (!dst_point)
2406     {
2407         p.x = 0;
2408         p.y = 0;
2409         dst_point = &p;
2410     }
2411     else if (dst_point->x < 0 || dst_point->y < 0)
2412     {
2413         WARN("Invalid destination point.\n");
2414         return WINED3DERR_INVALIDCALL;
2415     }
2416
2417     if (!src_rect)
2418     {
2419         r.left = 0;
2420         r.top = 0;
2421         r.right = src_surface->resource.width;
2422         r.bottom = src_surface->resource.height;
2423         src_rect = &r;
2424     }
2425     else if (src_rect->left < 0 || src_rect->left >= src_rect->right
2426             || src_rect->top < 0 || src_rect->top >= src_rect->bottom)
2427     {
2428         WARN("Invalid source rectangle.\n");
2429         return WINED3DERR_INVALIDCALL;
2430     }
2431
2432     src_w = src_surface->resource.width;
2433     src_h = src_surface->resource.height;
2434
2435     dst_w = dst_surface->resource.width;
2436     dst_h = dst_surface->resource.height;
2437
2438     update_w = src_rect->right - src_rect->left;
2439     update_h = src_rect->bottom - src_rect->top;
2440
2441     if (update_w > dst_w || dst_point->x > dst_w - update_w
2442             || update_h > dst_h || dst_point->y > dst_h - update_h)
2443     {
2444         WARN("Destination out of bounds.\n");
2445         return WINED3DERR_INVALIDCALL;
2446     }
2447
2448     /* NPOT block sizes would be silly. */
2449     if ((src_format->flags & WINED3DFMT_FLAG_BLOCKS)
2450             && ((update_w & (src_format->block_width - 1) || update_h & (src_format->block_height - 1))
2451             && (src_w != update_w || dst_w != update_w || src_h != update_h || dst_h != update_h)))
2452     {
2453         WARN("Update rect not block-aligned.\n");
2454         return WINED3DERR_INVALIDCALL;
2455     }
2456
2457     /* Use wined3d_surface_blt() instead of uploading directly if we need conversion. */
2458     d3dfmt_get_conv(dst_surface, FALSE, TRUE, &format, &convert);
2459     if (convert != WINED3D_CT_NONE || format.convert)
2460     {
2461         RECT dst_rect = {dst_point->x,  dst_point->y, dst_point->x + update_w, dst_point->y + update_h};
2462         return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, src_rect, 0, NULL, WINED3D_TEXF_POINT);
2463     }
2464
2465     context = context_acquire(dst_surface->resource.device, NULL);
2466     gl_info = context->gl_info;
2467
2468     /* Only load the surface for partial updates. For newly allocated texture
2469      * the texture wouldn't be the current location, and we'd upload zeroes
2470      * just to overwrite them again. */
2471     if (update_w == dst_w && update_h == dst_h)
2472         surface_prepare_texture(dst_surface, context, FALSE);
2473     else
2474         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
2475     surface_bind(dst_surface, context, FALSE);
2476
2477     data.buffer_object = src_surface->pbo;
2478     data.addr = src_surface->resource.allocatedMemory;
2479     src_pitch = wined3d_surface_get_pitch(src_surface);
2480
2481     surface_upload_data(dst_surface, gl_info, src_format, src_rect, src_pitch, dst_point, FALSE, &data);
2482
2483     invalidate_active_texture(dst_surface->resource.device, context);
2484
2485     context_release(context);
2486
2487     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
2488     return WINED3D_OK;
2489 }
2490
2491 /* This call just allocates the texture, the caller is responsible for binding
2492  * the correct texture. */
2493 /* Context activation is done by the caller. */
2494 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2495         const struct wined3d_format *format, BOOL srgb)
2496 {
2497     BOOL enable_client_storage = FALSE;
2498     GLsizei width = surface->pow2Width;
2499     GLsizei height = surface->pow2Height;
2500     const BYTE *mem = NULL;
2501     GLenum internal;
2502
2503     if (srgb)
2504     {
2505         internal = format->glGammaInternal;
2506     }
2507     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2508     {
2509         internal = format->rtInternal;
2510     }
2511     else
2512     {
2513         internal = format->glInternal;
2514     }
2515
2516     if (format->flags & WINED3DFMT_FLAG_HEIGHT_SCALE)
2517     {
2518         height *= format->height_scale.numerator;
2519         height /= format->height_scale.denominator;
2520     }
2521
2522     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2523             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2524             internal, width, height, format->glFormat, format->glType);
2525
2526     ENTER_GL();
2527
2528     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2529     {
2530         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2531                 || !surface->resource.allocatedMemory)
2532         {
2533             /* In some cases we want to disable client storage.
2534              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2535              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2536              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2537              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2538              */
2539             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2540             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2541             surface->flags &= ~SFLAG_CLIENT;
2542             enable_client_storage = TRUE;
2543         }
2544         else
2545         {
2546             surface->flags |= SFLAG_CLIENT;
2547
2548             /* Point OpenGL to our allocated texture memory. Do not use
2549              * resource.allocatedMemory here because it might point into a
2550              * PBO. Instead use heapMemory, but get the alignment right. */
2551             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2552                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2553         }
2554     }
2555
2556     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2557     {
2558         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2559                 internal, width, height, 0, surface->resource.size, mem));
2560         checkGLcall("glCompressedTexImage2DARB");
2561     }
2562     else
2563     {
2564         glTexImage2D(surface->texture_target, surface->texture_level,
2565                 internal, width, height, 0, format->glFormat, format->glType, mem);
2566         checkGLcall("glTexImage2D");
2567     }
2568
2569     if(enable_client_storage) {
2570         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2571         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2572     }
2573     LEAVE_GL();
2574 }
2575
2576 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2577  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2578 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2579 /* GL locking is done by the caller */
2580 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2581 {
2582     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2583     struct wined3d_renderbuffer_entry *entry;
2584     GLuint renderbuffer = 0;
2585     unsigned int src_width, src_height;
2586     unsigned int width, height;
2587
2588     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2589     {
2590         width = rt->pow2Width;
2591         height = rt->pow2Height;
2592     }
2593     else
2594     {
2595         width = surface->pow2Width;
2596         height = surface->pow2Height;
2597     }
2598
2599     src_width = surface->pow2Width;
2600     src_height = surface->pow2Height;
2601
2602     /* A depth stencil smaller than the render target is not valid */
2603     if (width > src_width || height > src_height) return;
2604
2605     /* Remove any renderbuffer set if the sizes match */
2606     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2607             || (width == src_width && height == src_height))
2608     {
2609         surface->current_renderbuffer = NULL;
2610         return;
2611     }
2612
2613     /* Look if we've already got a renderbuffer of the correct dimensions */
2614     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2615     {
2616         if (entry->width == width && entry->height == height)
2617         {
2618             renderbuffer = entry->id;
2619             surface->current_renderbuffer = entry;
2620             break;
2621         }
2622     }
2623
2624     if (!renderbuffer)
2625     {
2626         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2627         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2628         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2629                 surface->resource.format->glInternal, width, height);
2630
2631         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2632         entry->width = width;
2633         entry->height = height;
2634         entry->id = renderbuffer;
2635         list_add_head(&surface->renderbuffers, &entry->entry);
2636
2637         surface->current_renderbuffer = entry;
2638     }
2639
2640     checkGLcall("set_compatible_renderbuffer");
2641 }
2642
2643 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2644 {
2645     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2646
2647     TRACE("surface %p.\n", surface);
2648
2649     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2650     {
2651         ERR("Surface %p is not on a swapchain.\n", surface);
2652         return GL_NONE;
2653     }
2654
2655     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2656     {
2657         if (swapchain->render_to_fbo)
2658         {
2659             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2660             return GL_COLOR_ATTACHMENT0;
2661         }
2662         TRACE("Returning GL_BACK\n");
2663         return GL_BACK;
2664     }
2665     else if (surface == swapchain->front_buffer)
2666     {
2667         TRACE("Returning GL_FRONT\n");
2668         return GL_FRONT;
2669     }
2670
2671     FIXME("Higher back buffer, returning GL_BACK\n");
2672     return GL_BACK;
2673 }
2674
2675 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2676 void surface_add_dirty_rect(struct wined3d_surface *surface, const struct wined3d_box *dirty_rect)
2677 {
2678     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2679
2680     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2681         /* No partial locking for textures yet. */
2682         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2683
2684     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2685     if (dirty_rect)
2686     {
2687         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->left);
2688         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->top);
2689         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->right);
2690         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->bottom);
2691     }
2692     else
2693     {
2694         surface->dirtyRect.left = 0;
2695         surface->dirtyRect.top = 0;
2696         surface->dirtyRect.right = surface->resource.width;
2697         surface->dirtyRect.bottom = surface->resource.height;
2698     }
2699
2700     /* if the container is a texture then mark it dirty. */
2701     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2702     {
2703         TRACE("Passing to container.\n");
2704         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2705     }
2706 }
2707
2708 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2709 {
2710     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2711     BOOL ck_changed;
2712
2713     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2714
2715     if (surface->resource.pool == WINED3D_POOL_SCRATCH)
2716     {
2717         ERR("Not supported on scratch surfaces.\n");
2718         return WINED3DERR_INVALIDCALL;
2719     }
2720
2721     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2722
2723     /* Reload if either the texture and sysmem have different ideas about the
2724      * color key, or the actual key values changed. */
2725     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2726             && (surface->gl_color_key.color_space_low_value != surface->src_blt_color_key.color_space_low_value
2727             || surface->gl_color_key.color_space_high_value != surface->src_blt_color_key.color_space_high_value)))
2728     {
2729         TRACE("Reloading because of color keying\n");
2730         /* To perform the color key conversion we need a sysmem copy of
2731          * the surface. Make sure we have it. */
2732
2733         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2734         /* Make sure the texture is reloaded because of the color key change,
2735          * this kills performance though :( */
2736         /* TODO: This is not necessarily needed with hw palettized texture support. */
2737         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2738         /* Switching color keying on / off may change the internal format. */
2739         if (ck_changed)
2740             surface_force_reload(surface);
2741     }
2742     else if (!(surface->flags & flag))
2743     {
2744         TRACE("Reloading because surface is dirty.\n");
2745     }
2746     else
2747     {
2748         TRACE("surface is already in texture\n");
2749         return WINED3D_OK;
2750     }
2751
2752     /* No partial locking for textures yet. */
2753     surface_load_location(surface, flag, NULL);
2754     surface_evict_sysmem(surface);
2755
2756     return WINED3D_OK;
2757 }
2758
2759 /* See also float_16_to_32() in wined3d_private.h */
2760 static inline unsigned short float_32_to_16(const float *in)
2761 {
2762     int exp = 0;
2763     float tmp = fabsf(*in);
2764     unsigned int mantissa;
2765     unsigned short ret;
2766
2767     /* Deal with special numbers */
2768     if (*in == 0.0f)
2769         return 0x0000;
2770     if (isnan(*in))
2771         return 0x7c01;
2772     if (isinf(*in))
2773         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2774
2775     if (tmp < powf(2, 10))
2776     {
2777         do
2778         {
2779             tmp = tmp * 2.0f;
2780             exp--;
2781         } while (tmp < powf(2, 10));
2782     }
2783     else if (tmp >= powf(2, 11))
2784     {
2785         do
2786         {
2787             tmp /= 2.0f;
2788             exp++;
2789         } while (tmp >= powf(2, 11));
2790     }
2791
2792     mantissa = (unsigned int)tmp;
2793     if (tmp - mantissa >= 0.5f)
2794         ++mantissa; /* Round to nearest, away from zero. */
2795
2796     exp += 10;  /* Normalize the mantissa. */
2797     exp += 15;  /* Exponent is encoded with excess 15. */
2798
2799     if (exp > 30) /* too big */
2800     {
2801         ret = 0x7c00; /* INF */
2802     }
2803     else if (exp <= 0)
2804     {
2805         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2806         while (exp <= 0)
2807         {
2808             mantissa = mantissa >> 1;
2809             ++exp;
2810         }
2811         ret = mantissa & 0x3ff;
2812     }
2813     else
2814     {
2815         ret = (exp << 10) | (mantissa & 0x3ff);
2816     }
2817
2818     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2819     return ret;
2820 }
2821
2822 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2823 {
2824     ULONG refcount;
2825
2826     TRACE("Surface %p, container %p of type %#x.\n",
2827             surface, surface->container.u.base, surface->container.type);
2828
2829     switch (surface->container.type)
2830     {
2831         case WINED3D_CONTAINER_TEXTURE:
2832             return wined3d_texture_incref(surface->container.u.texture);
2833
2834         case WINED3D_CONTAINER_SWAPCHAIN:
2835             return wined3d_swapchain_incref(surface->container.u.swapchain);
2836
2837         default:
2838             ERR("Unhandled container type %#x.\n", surface->container.type);
2839         case WINED3D_CONTAINER_NONE:
2840             break;
2841     }
2842
2843     refcount = InterlockedIncrement(&surface->resource.ref);
2844     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2845
2846     return refcount;
2847 }
2848
2849 /* Do not call while under the GL lock. */
2850 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2851 {
2852     ULONG refcount;
2853
2854     TRACE("Surface %p, container %p of type %#x.\n",
2855             surface, surface->container.u.base, surface->container.type);
2856
2857     switch (surface->container.type)
2858     {
2859         case WINED3D_CONTAINER_TEXTURE:
2860             return wined3d_texture_decref(surface->container.u.texture);
2861
2862         case WINED3D_CONTAINER_SWAPCHAIN:
2863             return wined3d_swapchain_decref(surface->container.u.swapchain);
2864
2865         default:
2866             ERR("Unhandled container type %#x.\n", surface->container.type);
2867         case WINED3D_CONTAINER_NONE:
2868             break;
2869     }
2870
2871     refcount = InterlockedDecrement(&surface->resource.ref);
2872     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2873
2874     if (!refcount)
2875     {
2876         surface_cleanup(surface);
2877         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2878
2879         TRACE("Destroyed surface %p.\n", surface);
2880         HeapFree(GetProcessHeap(), 0, surface);
2881     }
2882
2883     return refcount;
2884 }
2885
2886 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2887 {
2888     return resource_set_priority(&surface->resource, priority);
2889 }
2890
2891 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2892 {
2893     return resource_get_priority(&surface->resource);
2894 }
2895
2896 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2897 {
2898     TRACE("surface %p.\n", surface);
2899
2900     if (!surface->resource.device->d3d_initialized)
2901     {
2902         ERR("D3D not initialized.\n");
2903         return;
2904     }
2905
2906     surface_internal_preload(surface, SRGB_ANY);
2907 }
2908
2909 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2910 {
2911     TRACE("surface %p.\n", surface);
2912
2913     return surface->resource.parent;
2914 }
2915
2916 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2917 {
2918     TRACE("surface %p.\n", surface);
2919
2920     return &surface->resource;
2921 }
2922
2923 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2924 {
2925     TRACE("surface %p, flags %#x.\n", surface, flags);
2926
2927     switch (flags)
2928     {
2929         case WINEDDGBS_CANBLT:
2930         case WINEDDGBS_ISBLTDONE:
2931             return WINED3D_OK;
2932
2933         default:
2934             return WINED3DERR_INVALIDCALL;
2935     }
2936 }
2937
2938 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2939 {
2940     TRACE("surface %p, flags %#x.\n", surface, flags);
2941
2942     /* XXX: DDERR_INVALIDSURFACETYPE */
2943
2944     switch (flags)
2945     {
2946         case WINEDDGFS_CANFLIP:
2947         case WINEDDGFS_ISFLIPDONE:
2948             return WINED3D_OK;
2949
2950         default:
2951             return WINED3DERR_INVALIDCALL;
2952     }
2953 }
2954
2955 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2956 {
2957     TRACE("surface %p.\n", surface);
2958
2959     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2960     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2961 }
2962
2963 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2964 {
2965     TRACE("surface %p.\n", surface);
2966
2967     surface->flags &= ~SFLAG_LOST;
2968     return WINED3D_OK;
2969 }
2970
2971 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2972 {
2973     TRACE("surface %p, palette %p.\n", surface, palette);
2974
2975     if (surface->palette == palette)
2976     {
2977         TRACE("Nop palette change.\n");
2978         return WINED3D_OK;
2979     }
2980
2981     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
2982         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
2983
2984     surface->palette = palette;
2985
2986     if (palette)
2987     {
2988         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
2989             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
2990
2991         surface->surface_ops->surface_realize_palette(surface);
2992     }
2993
2994     return WINED3D_OK;
2995 }
2996
2997 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
2998         DWORD flags, const struct wined3d_color_key *color_key)
2999 {
3000     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
3001
3002     if (flags & WINEDDCKEY_COLORSPACE)
3003     {
3004         FIXME(" colorkey value not supported (%08x) !\n", flags);
3005         return WINED3DERR_INVALIDCALL;
3006     }
3007
3008     /* Dirtify the surface, but only if a key was changed. */
3009     if (color_key)
3010     {
3011         switch (flags & ~WINEDDCKEY_COLORSPACE)
3012         {
3013             case WINEDDCKEY_DESTBLT:
3014                 surface->dst_blt_color_key = *color_key;
3015                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3016                 break;
3017
3018             case WINEDDCKEY_DESTOVERLAY:
3019                 surface->dst_overlay_color_key = *color_key;
3020                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3021                 break;
3022
3023             case WINEDDCKEY_SRCOVERLAY:
3024                 surface->src_overlay_color_key = *color_key;
3025                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3026                 break;
3027
3028             case WINEDDCKEY_SRCBLT:
3029                 surface->src_blt_color_key = *color_key;
3030                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3031                 break;
3032         }
3033     }
3034     else
3035     {
3036         switch (flags & ~WINEDDCKEY_COLORSPACE)
3037         {
3038             case WINEDDCKEY_DESTBLT:
3039                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3040                 break;
3041
3042             case WINEDDCKEY_DESTOVERLAY:
3043                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3044                 break;
3045
3046             case WINEDDCKEY_SRCOVERLAY:
3047                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3048                 break;
3049
3050             case WINEDDCKEY_SRCBLT:
3051                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3052                 break;
3053         }
3054     }
3055
3056     return WINED3D_OK;
3057 }
3058
3059 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3060 {
3061     TRACE("surface %p.\n", surface);
3062
3063     return surface->palette;
3064 }
3065
3066 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3067 {
3068     const struct wined3d_format *format = surface->resource.format;
3069     DWORD pitch;
3070
3071     TRACE("surface %p.\n", surface);
3072
3073     if (format->flags & WINED3DFMT_FLAG_BLOCKS)
3074     {
3075         /* Since compressed formats are block based, pitch means the amount of
3076          * bytes to the next row of block rather than the next row of pixels. */
3077         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3078         pitch = row_block_count * format->block_byte_count;
3079     }
3080     else
3081     {
3082         unsigned char alignment = surface->resource.device->surface_alignment;
3083         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3084         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3085     }
3086
3087     TRACE("Returning %u.\n", pitch);
3088
3089     return pitch;
3090 }
3091
3092 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3093 {
3094     TRACE("surface %p, mem %p.\n", surface, mem);
3095
3096     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
3097     {
3098         WARN("Surface is locked or the DC is in use.\n");
3099         return WINED3DERR_INVALIDCALL;
3100     }
3101
3102     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3103     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3104     {
3105         ERR("Not supported on render targets.\n");
3106         return WINED3DERR_INVALIDCALL;
3107     }
3108
3109     if (mem && mem != surface->resource.allocatedMemory)
3110     {
3111         void *release = NULL;
3112
3113         /* Do I have to copy the old surface content? */
3114         if (surface->flags & SFLAG_DIBSECTION)
3115         {
3116             DeleteDC(surface->hDC);
3117             DeleteObject(surface->dib.DIBsection);
3118             surface->dib.bitmap_data = NULL;
3119             surface->resource.allocatedMemory = NULL;
3120             surface->hDC = NULL;
3121             surface->flags &= ~SFLAG_DIBSECTION;
3122         }
3123         else if (!(surface->flags & SFLAG_USERPTR))
3124         {
3125             release = surface->resource.heapMemory;
3126             surface->resource.heapMemory = NULL;
3127         }
3128         surface->resource.allocatedMemory = mem;
3129         surface->flags |= SFLAG_USERPTR;
3130
3131         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3132         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3133
3134         /* For client textures OpenGL has to be notified. */
3135         if (surface->flags & SFLAG_CLIENT)
3136             surface_release_client_storage(surface);
3137
3138         /* Now free the old memory if any. */
3139         HeapFree(GetProcessHeap(), 0, release);
3140     }
3141     else if (surface->flags & SFLAG_USERPTR)
3142     {
3143         /* HeapMemory should be NULL already. */
3144         if (surface->resource.heapMemory)
3145             ERR("User pointer surface has heap memory allocated.\n");
3146
3147         if (!mem)
3148         {
3149             surface->resource.allocatedMemory = NULL;
3150             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3151
3152             if (surface->flags & SFLAG_CLIENT)
3153                 surface_release_client_storage(surface);
3154
3155             surface_prepare_system_memory(surface);
3156         }
3157
3158         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3159     }
3160
3161     return WINED3D_OK;
3162 }
3163
3164 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3165 {
3166     LONG w, h;
3167
3168     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3169
3170     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3171     {
3172         WARN("Not an overlay surface.\n");
3173         return WINEDDERR_NOTAOVERLAYSURFACE;
3174     }
3175
3176     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3177     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3178     surface->overlay_destrect.left = x;
3179     surface->overlay_destrect.top = y;
3180     surface->overlay_destrect.right = x + w;
3181     surface->overlay_destrect.bottom = y + h;
3182
3183     surface_draw_overlay(surface);
3184
3185     return WINED3D_OK;
3186 }
3187
3188 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3189 {
3190     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3191
3192     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3193     {
3194         TRACE("Not an overlay surface.\n");
3195         return WINEDDERR_NOTAOVERLAYSURFACE;
3196     }
3197
3198     if (!surface->overlay_dest)
3199     {
3200         TRACE("Overlay not visible.\n");
3201         *x = 0;
3202         *y = 0;
3203         return WINEDDERR_OVERLAYNOTVISIBLE;
3204     }
3205
3206     *x = surface->overlay_destrect.left;
3207     *y = surface->overlay_destrect.top;
3208
3209     TRACE("Returning position %d, %d.\n", *x, *y);
3210
3211     return WINED3D_OK;
3212 }
3213
3214 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3215         DWORD flags, struct wined3d_surface *ref)
3216 {
3217     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3218
3219     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3220     {
3221         TRACE("Not an overlay surface.\n");
3222         return WINEDDERR_NOTAOVERLAYSURFACE;
3223     }
3224
3225     return WINED3D_OK;
3226 }
3227
3228 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3229         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3230 {
3231     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3232             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3233
3234     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3235     {
3236         WARN("Not an overlay surface.\n");
3237         return WINEDDERR_NOTAOVERLAYSURFACE;
3238     }
3239     else if (!dst_surface)
3240     {
3241         WARN("Dest surface is NULL.\n");
3242         return WINED3DERR_INVALIDCALL;
3243     }
3244
3245     if (src_rect)
3246     {
3247         surface->overlay_srcrect = *src_rect;
3248     }
3249     else
3250     {
3251         surface->overlay_srcrect.left = 0;
3252         surface->overlay_srcrect.top = 0;
3253         surface->overlay_srcrect.right = surface->resource.width;
3254         surface->overlay_srcrect.bottom = surface->resource.height;
3255     }
3256
3257     if (dst_rect)
3258     {
3259         surface->overlay_destrect = *dst_rect;
3260     }
3261     else
3262     {
3263         surface->overlay_destrect.left = 0;
3264         surface->overlay_destrect.top = 0;
3265         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3266         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3267     }
3268
3269     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3270     {
3271         surface->overlay_dest = NULL;
3272         list_remove(&surface->overlay_entry);
3273     }
3274
3275     if (flags & WINEDDOVER_SHOW)
3276     {
3277         if (surface->overlay_dest != dst_surface)
3278         {
3279             surface->overlay_dest = dst_surface;
3280             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3281         }
3282     }
3283     else if (flags & WINEDDOVER_HIDE)
3284     {
3285         /* tests show that the rectangles are erased on hide */
3286         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3287         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3288         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3289         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3290         surface->overlay_dest = NULL;
3291     }
3292
3293     surface_draw_overlay(surface);
3294
3295     return WINED3D_OK;
3296 }
3297
3298 HRESULT CDECL wined3d_surface_update_desc(struct wined3d_surface *surface,
3299         UINT width, UINT height, enum wined3d_format_id format_id,
3300         enum wined3d_multisample_type multisample_type, UINT multisample_quality)
3301 {
3302     struct wined3d_device *device = surface->resource.device;
3303     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
3304     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
3305     UINT resource_size = wined3d_format_calculate_size(format, device->surface_alignment, width, height);
3306
3307     TRACE("surface %p, width %u, height %u, format %s, multisample_type %#x, multisample_quality %u.\n",
3308             surface, width, height, debug_d3dformat(format_id), multisample_type, multisample_type);
3309
3310     if (!resource_size)
3311         return WINED3DERR_INVALIDCALL;
3312
3313     if (device->d3d_initialized)
3314         surface->resource.resource_ops->resource_unload(&surface->resource);
3315
3316     if (surface->flags & SFLAG_DIBSECTION)
3317     {
3318         DeleteDC(surface->hDC);
3319         DeleteObject(surface->dib.DIBsection);
3320         surface->dib.bitmap_data = NULL;
3321         surface->flags &= ~SFLAG_DIBSECTION;
3322     }
3323
3324     surface->flags &= ~(SFLAG_LOCATIONS | SFLAG_USERPTR);
3325     surface->resource.allocatedMemory = NULL;
3326     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
3327     surface->resource.heapMemory = NULL;
3328
3329     surface->resource.width = width;
3330     surface->resource.height = height;
3331     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[ARB_TEXTURE_RECTANGLE]
3332             || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
3333     {
3334         surface->pow2Width = width;
3335         surface->pow2Height = height;
3336     }
3337     else
3338     {
3339         surface->pow2Width = surface->pow2Height = 1;
3340         while (surface->pow2Width < width)
3341             surface->pow2Width <<= 1;
3342         while (surface->pow2Height < height)
3343             surface->pow2Height <<= 1;
3344     }
3345
3346     if (surface->pow2Width != width || surface->pow2Height != height)
3347         surface->flags |= SFLAG_NONPOW2;
3348     else
3349         surface->flags &= ~SFLAG_NONPOW2;
3350
3351     surface->resource.format = format;
3352     surface->resource.multisample_type = multisample_type;
3353     surface->resource.multisample_quality = multisample_quality;
3354     surface->resource.size = resource_size;
3355
3356     if (!surface_init_sysmem(surface))
3357         return E_OUTOFMEMORY;
3358
3359     return WINED3D_OK;
3360 }
3361
3362 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3363         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3364 {
3365     unsigned short *dst_s;
3366     const float *src_f;
3367     unsigned int x, y;
3368
3369     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3370
3371     for (y = 0; y < h; ++y)
3372     {
3373         src_f = (const float *)(src + y * pitch_in);
3374         dst_s = (unsigned short *) (dst + y * pitch_out);
3375         for (x = 0; x < w; ++x)
3376         {
3377             dst_s[x] = float_32_to_16(src_f + x);
3378         }
3379     }
3380 }
3381
3382 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3383         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3384 {
3385     static const unsigned char convert_5to8[] =
3386     {
3387         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3388         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3389         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3390         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3391     };
3392     static const unsigned char convert_6to8[] =
3393     {
3394         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3395         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3396         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3397         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3398         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3399         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3400         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3401         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3402     };
3403     unsigned int x, y;
3404
3405     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3406
3407     for (y = 0; y < h; ++y)
3408     {
3409         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3410         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3411         for (x = 0; x < w; ++x)
3412         {
3413             WORD pixel = src_line[x];
3414             dst_line[x] = 0xff000000
3415                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3416                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3417                     | convert_5to8[(pixel & 0x001f)];
3418         }
3419     }
3420 }
3421
3422 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3423  * in both cases we're just setting the X / Alpha channel to 0xff. */
3424 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3425         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3426 {
3427     unsigned int x, y;
3428
3429     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3430
3431     for (y = 0; y < h; ++y)
3432     {
3433         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3434         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3435
3436         for (x = 0; x < w; ++x)
3437         {
3438             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3439         }
3440     }
3441 }
3442
3443 static inline BYTE cliptobyte(int x)
3444 {
3445     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3446 }
3447
3448 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3449         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3450 {
3451     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3452     unsigned int x, y;
3453
3454     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3455
3456     for (y = 0; y < h; ++y)
3457     {
3458         const BYTE *src_line = src + y * pitch_in;
3459         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3460         for (x = 0; x < w; ++x)
3461         {
3462             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3463              *     C = Y - 16; D = U - 128; E = V - 128;
3464              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3465              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3466              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3467              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3468              * U and V are shared between the pixels. */
3469             if (!(x & 1)) /* For every even pixel, read new U and V. */
3470             {
3471                 d = (int) src_line[1] - 128;
3472                 e = (int) src_line[3] - 128;
3473                 r2 = 409 * e + 128;
3474                 g2 = - 100 * d - 208 * e + 128;
3475                 b2 = 516 * d + 128;
3476             }
3477             c2 = 298 * ((int) src_line[0] - 16);
3478             dst_line[x] = 0xff000000
3479                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3480                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3481                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3482                 /* Scale RGB values to 0..255 range,
3483                  * then clip them if still not in range (may be negative),
3484                  * then shift them within DWORD if necessary. */
3485             src_line += 2;
3486         }
3487     }
3488 }
3489
3490 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3491         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3492 {
3493     unsigned int x, y;
3494     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3495
3496     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3497
3498     for (y = 0; y < h; ++y)
3499     {
3500         const BYTE *src_line = src + y * pitch_in;
3501         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3502         for (x = 0; x < w; ++x)
3503         {
3504             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3505              *     C = Y - 16; D = U - 128; E = V - 128;
3506              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3507              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3508              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3509              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3510              * U and V are shared between the pixels. */
3511             if (!(x & 1)) /* For every even pixel, read new U and V. */
3512             {
3513                 d = (int) src_line[1] - 128;
3514                 e = (int) src_line[3] - 128;
3515                 r2 = 409 * e + 128;
3516                 g2 = - 100 * d - 208 * e + 128;
3517                 b2 = 516 * d + 128;
3518             }
3519             c2 = 298 * ((int) src_line[0] - 16);
3520             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3521                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3522                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3523                 /* Scale RGB values to 0..255 range,
3524                  * then clip them if still not in range (may be negative),
3525                  * then shift them within DWORD if necessary. */
3526             src_line += 2;
3527         }
3528     }
3529 }
3530
3531 struct d3dfmt_convertor_desc
3532 {
3533     enum wined3d_format_id from, to;
3534     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3535 };
3536
3537 static const struct d3dfmt_convertor_desc convertors[] =
3538 {
3539     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3540     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3541     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3542     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3543     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3544     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3545 };
3546
3547 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3548         enum wined3d_format_id to)
3549 {
3550     unsigned int i;
3551
3552     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3553     {
3554         if (convertors[i].from == from && convertors[i].to == to)
3555             return &convertors[i];
3556     }
3557
3558     return NULL;
3559 }
3560
3561 /*****************************************************************************
3562  * surface_convert_format
3563  *
3564  * Creates a duplicate of a surface in a different format. Is used by Blt to
3565  * blit between surfaces with different formats.
3566  *
3567  * Parameters
3568  *  source: Source surface
3569  *  fmt: Requested destination format
3570  *
3571  *****************************************************************************/
3572 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3573 {
3574     struct wined3d_mapped_rect src_map, dst_map;
3575     const struct d3dfmt_convertor_desc *conv;
3576     struct wined3d_surface *ret = NULL;
3577     HRESULT hr;
3578
3579     conv = find_convertor(source->resource.format->id, to_fmt);
3580     if (!conv)
3581     {
3582         FIXME("Cannot find a conversion function from format %s to %s.\n",
3583                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3584         return NULL;
3585     }
3586
3587     wined3d_surface_create(source->resource.device, source->resource.width,
3588             source->resource.height, to_fmt, 0 /* level */, 0 /* usage */, WINED3D_POOL_SCRATCH,
3589             WINED3D_MULTISAMPLE_NONE /* TODO: Multisampled conversion */, 0 /* MultiSampleQuality */,
3590             source->surface_type, WINED3D_SURFACE_MAPPABLE | WINED3D_SURFACE_DISCARD,
3591             NULL /* parent */, &wined3d_null_parent_ops, &ret);
3592     if (!ret)
3593     {
3594         ERR("Failed to create a destination surface for conversion.\n");
3595         return NULL;
3596     }
3597
3598     memset(&src_map, 0, sizeof(src_map));
3599     memset(&dst_map, 0, sizeof(dst_map));
3600
3601     hr = wined3d_surface_map(source, &src_map, NULL, WINED3DLOCK_READONLY);
3602     if (FAILED(hr))
3603     {
3604         ERR("Failed to lock the source surface.\n");
3605         wined3d_surface_decref(ret);
3606         return NULL;
3607     }
3608     hr = wined3d_surface_map(ret, &dst_map, NULL, WINED3DLOCK_READONLY);
3609     if (FAILED(hr))
3610     {
3611         ERR("Failed to lock the destination surface.\n");
3612         wined3d_surface_unmap(source);
3613         wined3d_surface_decref(ret);
3614         return NULL;
3615     }
3616
3617     conv->convert(src_map.data, dst_map.data, src_map.row_pitch, dst_map.row_pitch,
3618             source->resource.width, source->resource.height);
3619
3620     wined3d_surface_unmap(ret);
3621     wined3d_surface_unmap(source);
3622
3623     return ret;
3624 }
3625
3626 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3627         unsigned int bpp, UINT pitch, DWORD color)
3628 {
3629     BYTE *first;
3630     int x, y;
3631
3632     /* Do first row */
3633
3634 #define COLORFILL_ROW(type) \
3635 do { \
3636     type *d = (type *)buf; \
3637     for (x = 0; x < width; ++x) \
3638         d[x] = (type)color; \
3639 } while(0)
3640
3641     switch (bpp)
3642     {
3643         case 1:
3644             COLORFILL_ROW(BYTE);
3645             break;
3646
3647         case 2:
3648             COLORFILL_ROW(WORD);
3649             break;
3650
3651         case 3:
3652         {
3653             BYTE *d = buf;
3654             for (x = 0; x < width; ++x, d += 3)
3655             {
3656                 d[0] = (color      ) & 0xFF;
3657                 d[1] = (color >>  8) & 0xFF;
3658                 d[2] = (color >> 16) & 0xFF;
3659             }
3660             break;
3661         }
3662         case 4:
3663             COLORFILL_ROW(DWORD);
3664             break;
3665
3666         default:
3667             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3668             return WINED3DERR_NOTAVAILABLE;
3669     }
3670
3671 #undef COLORFILL_ROW
3672
3673     /* Now copy first row. */
3674     first = buf;
3675     for (y = 1; y < height; ++y)
3676     {
3677         buf += pitch;
3678         memcpy(buf, first, width * bpp);
3679     }
3680
3681     return WINED3D_OK;
3682 }
3683
3684 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3685 {
3686     TRACE("surface %p.\n", surface);
3687
3688     if (!(surface->flags & SFLAG_LOCKED))
3689     {
3690         WARN("Trying to unmap unmapped surface.\n");
3691         return WINEDDERR_NOTLOCKED;
3692     }
3693     surface->flags &= ~SFLAG_LOCKED;
3694
3695     surface->surface_ops->surface_unmap(surface);
3696
3697     return WINED3D_OK;
3698 }
3699
3700 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3701         struct wined3d_mapped_rect *mapped_rect, const RECT *rect, DWORD flags)
3702 {
3703     const struct wined3d_format *format = surface->resource.format;
3704
3705     TRACE("surface %p, mapped_rect %p, rect %s, flags %#x.\n",
3706             surface, mapped_rect, wine_dbgstr_rect(rect), flags);
3707
3708     if (surface->flags & SFLAG_LOCKED)
3709     {
3710         WARN("Surface is already mapped.\n");
3711         return WINED3DERR_INVALIDCALL;
3712     }
3713     if ((format->flags & WINED3DFMT_FLAG_BLOCKS)
3714             && rect && (rect->left || rect->top
3715             || rect->right != surface->resource.width
3716             || rect->bottom != surface->resource.height))
3717     {
3718         UINT width_mask = format->block_width - 1;
3719         UINT height_mask = format->block_height - 1;
3720
3721         if ((rect->left & width_mask) || (rect->right & width_mask)
3722                 || (rect->top & height_mask) || (rect->bottom & height_mask))
3723         {
3724             WARN("Map rect %s is misaligned for %ux%u blocks.\n",
3725                     wine_dbgstr_rect(rect), format->block_width, format->block_height);
3726
3727             if (surface->resource.pool == WINED3D_POOL_DEFAULT)
3728                 return WINED3DERR_INVALIDCALL;
3729         }
3730     }
3731
3732     surface->flags |= SFLAG_LOCKED;
3733
3734     if (!(surface->flags & SFLAG_LOCKABLE))
3735         WARN("Trying to lock unlockable surface.\n");
3736
3737     /* Performance optimization: Count how often a surface is mapped, if it is
3738      * mapped regularly do not throw away the system memory copy. This avoids
3739      * the need to download the surface from OpenGL all the time. The surface
3740      * is still downloaded if the OpenGL texture is changed. */
3741     if (!(surface->flags & SFLAG_DYNLOCK))
3742     {
3743         if (++surface->lockCount > MAXLOCKCOUNT)
3744         {
3745             TRACE("Surface is mapped regularly, not freeing the system memory copy any more.\n");
3746             surface->flags |= SFLAG_DYNLOCK;
3747         }
3748     }
3749
3750     surface->surface_ops->surface_map(surface, rect, flags);
3751
3752     if (format->flags & WINED3DFMT_FLAG_BROKEN_PITCH)
3753         mapped_rect->row_pitch = surface->resource.width * format->byte_count;
3754     else
3755         mapped_rect->row_pitch = wined3d_surface_get_pitch(surface);
3756
3757     if (!rect)
3758     {
3759         mapped_rect->data = surface->resource.allocatedMemory;
3760         surface->lockedRect.left = 0;
3761         surface->lockedRect.top = 0;
3762         surface->lockedRect.right = surface->resource.width;
3763         surface->lockedRect.bottom = surface->resource.height;
3764     }
3765     else
3766     {
3767         if ((format->flags & (WINED3DFMT_FLAG_BLOCKS | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_BLOCKS)
3768         {
3769             /* Compressed textures are block based, so calculate the offset of
3770              * the block that contains the top-left pixel of the locked rectangle. */
3771             mapped_rect->data = surface->resource.allocatedMemory
3772                     + ((rect->top / format->block_height) * mapped_rect->row_pitch)
3773                     + ((rect->left / format->block_width) * format->block_byte_count);
3774         }
3775         else
3776         {
3777             mapped_rect->data = surface->resource.allocatedMemory
3778                     + (mapped_rect->row_pitch * rect->top)
3779                     + (rect->left * format->byte_count);
3780         }
3781         surface->lockedRect.left = rect->left;
3782         surface->lockedRect.top = rect->top;
3783         surface->lockedRect.right = rect->right;
3784         surface->lockedRect.bottom = rect->bottom;
3785     }
3786
3787     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3788     TRACE("Returning memory %p, pitch %u.\n", mapped_rect->data, mapped_rect->row_pitch);
3789
3790     return WINED3D_OK;
3791 }
3792
3793 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3794 {
3795     struct wined3d_mapped_rect map;
3796     HRESULT hr;
3797
3798     TRACE("surface %p, dc %p.\n", surface, dc);
3799
3800     if (surface->flags & SFLAG_USERPTR)
3801     {
3802         ERR("Not supported on surfaces with application-provided memory.\n");
3803         return WINEDDERR_NODC;
3804     }
3805
3806     /* Give more detailed info for ddraw. */
3807     if (surface->flags & SFLAG_DCINUSE)
3808         return WINEDDERR_DCALREADYCREATED;
3809
3810     /* Can't GetDC if the surface is locked. */
3811     if (surface->flags & SFLAG_LOCKED)
3812         return WINED3DERR_INVALIDCALL;
3813
3814     /* Create a DIB section if there isn't a dc yet. */
3815     if (!surface->hDC)
3816     {
3817         if (surface->flags & SFLAG_CLIENT)
3818         {
3819             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3820             surface_release_client_storage(surface);
3821         }
3822         hr = surface_create_dib_section(surface);
3823         if (FAILED(hr))
3824             return WINED3DERR_INVALIDCALL;
3825
3826         /* Use the DIB section from now on if we are not using a PBO. */
3827         if (!(surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM)))
3828         {
3829             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
3830             surface->resource.heapMemory = NULL;
3831             surface->resource.allocatedMemory = surface->dib.bitmap_data;
3832         }
3833     }
3834
3835     /* Map the surface. */
3836     hr = wined3d_surface_map(surface, &map, NULL, 0);
3837     if (FAILED(hr))
3838     {
3839         ERR("Map failed, hr %#x.\n", hr);
3840         return hr;
3841     }
3842
3843     /* Sync the DIB with the PBO. This can't be done earlier because Map()
3844      * activates the allocatedMemory. */
3845     if (surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM))
3846         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
3847
3848     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3849             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3850     {
3851         /* GetDC on palettized formats is unsupported in D3D9, and the method
3852          * is missing in D3D8, so this should only be used for DX <=7
3853          * surfaces (with non-device palettes). */
3854         const PALETTEENTRY *pal = NULL;
3855
3856         if (surface->palette)
3857         {
3858             pal = surface->palette->palents;
3859         }
3860         else
3861         {
3862             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3863             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3864
3865             if (dds_primary && dds_primary->palette)
3866                 pal = dds_primary->palette->palents;
3867         }
3868
3869         if (pal)
3870         {
3871             RGBQUAD col[256];
3872             unsigned int i;
3873
3874             for (i = 0; i < 256; ++i)
3875             {
3876                 col[i].rgbRed = pal[i].peRed;
3877                 col[i].rgbGreen = pal[i].peGreen;
3878                 col[i].rgbBlue = pal[i].peBlue;
3879                 col[i].rgbReserved = 0;
3880             }
3881             SetDIBColorTable(surface->hDC, 0, 256, col);
3882         }
3883     }
3884
3885     surface->flags |= SFLAG_DCINUSE;
3886
3887     *dc = surface->hDC;
3888     TRACE("Returning dc %p.\n", *dc);
3889
3890     return WINED3D_OK;
3891 }
3892
3893 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3894 {
3895     TRACE("surface %p, dc %p.\n", surface, dc);
3896
3897     if (!(surface->flags & SFLAG_DCINUSE))
3898         return WINEDDERR_NODC;
3899
3900     if (surface->hDC != dc)
3901     {
3902         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3903                 dc, surface->hDC);
3904         return WINEDDERR_NODC;
3905     }
3906
3907     /* Copy the contents of the DIB over to the PBO. */
3908     if ((surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM)) && surface->resource.allocatedMemory)
3909         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
3910
3911     /* We locked first, so unlock now. */
3912     wined3d_surface_unmap(surface);
3913
3914     surface->flags &= ~SFLAG_DCINUSE;
3915
3916     return WINED3D_OK;
3917 }
3918
3919 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3920 {
3921     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3922
3923     if (flags)
3924     {
3925         static UINT once;
3926         if (!once++)
3927             FIXME("Ignoring flags %#x.\n", flags);
3928         else
3929             WARN("Ignoring flags %#x.\n", flags);
3930     }
3931
3932     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
3933     {
3934         ERR("Not supported on swapchain surfaces.\n");
3935         return WINEDDERR_NOTFLIPPABLE;
3936     }
3937
3938     /* Flipping is only supported on render targets and overlays. */
3939     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
3940     {
3941         WARN("Tried to flip a non-render target, non-overlay surface.\n");
3942         return WINEDDERR_NOTFLIPPABLE;
3943     }
3944
3945     flip_surface(surface, override);
3946
3947     /* Update overlays if they're visible. */
3948     if ((surface->resource.usage & WINED3DUSAGE_OVERLAY) && surface->overlay_dest)
3949         return surface_draw_overlay(surface);
3950
3951     return WINED3D_OK;
3952 }
3953
3954 /* Do not call while under the GL lock. */
3955 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3956 {
3957     struct wined3d_device *device = surface->resource.device;
3958
3959     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3960
3961     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3962     {
3963         struct wined3d_texture *texture = surface->container.u.texture;
3964
3965         TRACE("Passing to container (%p).\n", texture);
3966         texture->texture_ops->texture_preload(texture, srgb);
3967     }
3968     else
3969     {
3970         struct wined3d_context *context;
3971
3972         TRACE("(%p) : About to load surface\n", surface);
3973
3974         /* TODO: Use already acquired context when possible. */
3975         context = context_acquire(device, NULL);
3976
3977         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3978
3979         if (surface->resource.pool == WINED3D_POOL_DEFAULT)
3980         {
3981             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3982             GLclampf tmp;
3983             tmp = 0.9f;
3984             ENTER_GL();
3985             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3986             LEAVE_GL();
3987         }
3988
3989         context_release(context);
3990     }
3991 }
3992
3993 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3994 {
3995     if (!surface->resource.allocatedMemory)
3996     {
3997         if (!surface->resource.heapMemory)
3998         {
3999             if (!(surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
4000                     surface->resource.size + RESOURCE_ALIGNMENT)))
4001             {
4002                 ERR("Failed to allocate memory.\n");
4003                 return FALSE;
4004             }
4005         }
4006         else if (!(surface->flags & SFLAG_CLIENT))
4007         {
4008             ERR("Surface %p has heapMemory %p and flags %#x.\n",
4009                     surface, surface->resource.heapMemory, surface->flags);
4010         }
4011
4012         surface->resource.allocatedMemory =
4013             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
4014     }
4015     else
4016     {
4017         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
4018     }
4019
4020     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
4021
4022     return TRUE;
4023 }
4024
4025 /* Read the framebuffer back into the surface */
4026 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
4027 {
4028     struct wined3d_device *device = surface->resource.device;
4029     const struct wined3d_gl_info *gl_info;
4030     struct wined3d_context *context;
4031     BYTE *mem;
4032     GLint fmt;
4033     GLint type;
4034     BYTE *row, *top, *bottom;
4035     int i;
4036     BOOL bpp;
4037     RECT local_rect;
4038     BOOL srcIsUpsideDown;
4039     GLint rowLen = 0;
4040     GLint skipPix = 0;
4041     GLint skipRow = 0;
4042
4043     context = context_acquire(device, surface);
4044     context_apply_blit_state(context, device);
4045     gl_info = context->gl_info;
4046
4047     ENTER_GL();
4048
4049     /* Select the correct read buffer, and give some debug output.
4050      * There is no need to keep track of the current read buffer or reset it, every part of the code
4051      * that reads sets the read buffer as desired.
4052      */
4053     if (surface_is_offscreen(surface))
4054     {
4055         /* Mapping the primary render target which is not on a swapchain.
4056          * Read from the back buffer. */
4057         TRACE("Mapping offscreen render target.\n");
4058         glReadBuffer(device->offscreenBuffer);
4059         srcIsUpsideDown = TRUE;
4060     }
4061     else
4062     {
4063         /* Onscreen surfaces are always part of a swapchain */
4064         GLenum buffer = surface_get_gl_buffer(surface);
4065         TRACE("Mapping %#x buffer.\n", buffer);
4066         glReadBuffer(buffer);
4067         checkGLcall("glReadBuffer");
4068         srcIsUpsideDown = FALSE;
4069     }
4070
4071     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
4072     if (!rect)
4073     {
4074         local_rect.left = 0;
4075         local_rect.top = 0;
4076         local_rect.right = surface->resource.width;
4077         local_rect.bottom = surface->resource.height;
4078     }
4079     else
4080     {
4081         local_rect = *rect;
4082     }
4083     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
4084
4085     switch (surface->resource.format->id)
4086     {
4087         case WINED3DFMT_P8_UINT:
4088         {
4089             if (primary_render_target_is_p8(device))
4090             {
4091                 /* In case of P8 render targets the index is stored in the alpha component */
4092                 fmt = GL_ALPHA;
4093                 type = GL_UNSIGNED_BYTE;
4094                 mem = dest;
4095                 bpp = surface->resource.format->byte_count;
4096             }
4097             else
4098             {
4099                 /* GL can't return palettized data, so read ARGB pixels into a
4100                  * separate block of memory and convert them into palettized format
4101                  * in software. Slow, but if the app means to use palettized render
4102                  * targets and locks it...
4103                  *
4104                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4105                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4106                  * for the color channels when palettizing the colors.
4107                  */
4108                 fmt = GL_RGB;
4109                 type = GL_UNSIGNED_BYTE;
4110                 pitch *= 3;
4111                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4112                 if (!mem)
4113                 {
4114                     ERR("Out of memory\n");
4115                     LEAVE_GL();
4116                     return;
4117                 }
4118                 bpp = surface->resource.format->byte_count * 3;
4119             }
4120         }
4121         break;
4122
4123         default:
4124             mem = dest;
4125             fmt = surface->resource.format->glFormat;
4126             type = surface->resource.format->glType;
4127             bpp = surface->resource.format->byte_count;
4128     }
4129
4130     if (surface->flags & SFLAG_PBO)
4131     {
4132         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4133         checkGLcall("glBindBufferARB");
4134         if (mem)
4135         {
4136             ERR("mem not null for pbo -- unexpected\n");
4137             mem = NULL;
4138         }
4139     }
4140
4141     /* Save old pixel store pack state */
4142     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4143     checkGLcall("glGetIntegerv");
4144     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4145     checkGLcall("glGetIntegerv");
4146     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4147     checkGLcall("glGetIntegerv");
4148
4149     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4150     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4151     checkGLcall("glPixelStorei");
4152     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4153     checkGLcall("glPixelStorei");
4154     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4155     checkGLcall("glPixelStorei");
4156
4157     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4158             local_rect.right - local_rect.left,
4159             local_rect.bottom - local_rect.top,
4160             fmt, type, mem);
4161     checkGLcall("glReadPixels");
4162
4163     /* Reset previous pixel store pack state */
4164     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4165     checkGLcall("glPixelStorei");
4166     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4167     checkGLcall("glPixelStorei");
4168     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4169     checkGLcall("glPixelStorei");
4170
4171     if (surface->flags & SFLAG_PBO)
4172     {
4173         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4174         checkGLcall("glBindBufferARB");
4175
4176         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4177          * to get a pointer to it and perform the flipping in software. This is a lot
4178          * faster than calling glReadPixels for each line. In case we want more speed
4179          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4180         if (!srcIsUpsideDown)
4181         {
4182             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4183             checkGLcall("glBindBufferARB");
4184
4185             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4186             checkGLcall("glMapBufferARB");
4187         }
4188     }
4189
4190     /* TODO: Merge this with the palettization loop below for P8 targets */
4191     if(!srcIsUpsideDown) {
4192         UINT len, off;
4193         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4194             Flip the lines in software */
4195         len = (local_rect.right - local_rect.left) * bpp;
4196         off = local_rect.left * bpp;
4197
4198         row = HeapAlloc(GetProcessHeap(), 0, len);
4199         if(!row) {
4200             ERR("Out of memory\n");
4201             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4202                 HeapFree(GetProcessHeap(), 0, mem);
4203             LEAVE_GL();
4204             return;
4205         }
4206
4207         top = mem + pitch * local_rect.top;
4208         bottom = mem + pitch * (local_rect.bottom - 1);
4209         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4210             memcpy(row, top + off, len);
4211             memcpy(top + off, bottom + off, len);
4212             memcpy(bottom + off, row, len);
4213             top += pitch;
4214             bottom -= pitch;
4215         }
4216         HeapFree(GetProcessHeap(), 0, row);
4217
4218         /* Unmap the temp PBO buffer */
4219         if (surface->flags & SFLAG_PBO)
4220         {
4221             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4222             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4223         }
4224     }
4225
4226     LEAVE_GL();
4227     context_release(context);
4228
4229     /* For P8 textures we need to perform an inverse palette lookup. This is
4230      * done by searching for a palette index which matches the RGB value.
4231      * Note this isn't guaranteed to work when there are multiple entries for
4232      * the same color but we have no choice. In case of P8 render targets,
4233      * the index is stored in the alpha component so no conversion is needed. */
4234     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4235     {
4236         const PALETTEENTRY *pal = NULL;
4237         DWORD width = pitch / 3;
4238         int x, y, c;
4239
4240         if (surface->palette)
4241         {
4242             pal = surface->palette->palents;
4243         }
4244         else
4245         {
4246             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4247             HeapFree(GetProcessHeap(), 0, mem);
4248             return;
4249         }
4250
4251         for(y = local_rect.top; y < local_rect.bottom; y++) {
4252             for(x = local_rect.left; x < local_rect.right; x++) {
4253                 /*                      start              lines            pixels      */
4254                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4255                 const BYTE *green = blue  + 1;
4256                 const BYTE *red = green + 1;
4257
4258                 for(c = 0; c < 256; c++) {
4259                     if(*red   == pal[c].peRed   &&
4260                        *green == pal[c].peGreen &&
4261                        *blue  == pal[c].peBlue)
4262                     {
4263                         *((BYTE *) dest + y * width + x) = c;
4264                         break;
4265                     }
4266                 }
4267             }
4268         }
4269         HeapFree(GetProcessHeap(), 0, mem);
4270     }
4271 }
4272
4273 /* Read the framebuffer contents into a texture. Note that this function
4274  * doesn't do any kind of flipping. Using this on an onscreen surface will
4275  * result in a flipped D3D texture. */
4276 void surface_load_fb_texture(struct wined3d_surface *surface, BOOL srgb)
4277 {
4278     struct wined3d_device *device = surface->resource.device;
4279     struct wined3d_context *context;
4280
4281     context = context_acquire(device, surface);
4282     device_invalidate_state(device, STATE_FRAMEBUFFER);
4283
4284     surface_prepare_texture(surface, context, srgb);
4285     surface_bind_and_dirtify(surface, context, srgb);
4286
4287     TRACE("Reading back offscreen render target %p.\n", surface);
4288
4289     ENTER_GL();
4290
4291     if (surface_is_offscreen(surface))
4292         glReadBuffer(device->offscreenBuffer);
4293     else
4294         glReadBuffer(surface_get_gl_buffer(surface));
4295     checkGLcall("glReadBuffer");
4296
4297     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4298             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4299     checkGLcall("glCopyTexSubImage2D");
4300
4301     LEAVE_GL();
4302
4303     context_release(context);
4304 }
4305
4306 /* Context activation is done by the caller. */
4307 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4308         struct wined3d_context *context, BOOL srgb)
4309 {
4310     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4311     enum wined3d_conversion_type convert;
4312     struct wined3d_format format;
4313
4314     if (surface->flags & alloc_flag) return;
4315
4316     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4317     if (convert != WINED3D_CT_NONE || format.convert)
4318         surface->flags |= SFLAG_CONVERTED;
4319     else surface->flags &= ~SFLAG_CONVERTED;
4320
4321     surface_bind_and_dirtify(surface, context, srgb);
4322     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4323     surface->flags |= alloc_flag;
4324 }
4325
4326 /* Context activation is done by the caller. */
4327 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4328 {
4329     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4330     {
4331         struct wined3d_texture *texture = surface->container.u.texture;
4332         UINT sub_count = texture->level_count * texture->layer_count;
4333         UINT i;
4334
4335         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4336
4337         for (i = 0; i < sub_count; ++i)
4338         {
4339             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4340             surface_prepare_texture_internal(s, context, srgb);
4341         }
4342
4343         return;
4344     }
4345
4346     surface_prepare_texture_internal(surface, context, srgb);
4347 }
4348
4349 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4350 {
4351     if (multisample)
4352     {
4353         if (surface->rb_multisample)
4354             return;
4355
4356         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4357         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4358         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4359                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4360         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4361     }
4362     else
4363     {
4364         if (surface->rb_resolved)
4365             return;
4366
4367         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4368         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4369         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4370                 surface->pow2Width, surface->pow2Height);
4371         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4372     }
4373 }
4374
4375 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4376         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4377 {
4378     struct wined3d_device *device = surface->resource.device;
4379     UINT pitch = wined3d_surface_get_pitch(surface);
4380     const struct wined3d_gl_info *gl_info;
4381     struct wined3d_context *context;
4382     RECT local_rect;
4383     UINT w, h;
4384
4385     surface_get_rect(surface, rect, &local_rect);
4386
4387     mem += local_rect.top * pitch + local_rect.left * bpp;
4388     w = local_rect.right - local_rect.left;
4389     h = local_rect.bottom - local_rect.top;
4390
4391     /* Activate the correct context for the render target */
4392     context = context_acquire(device, surface);
4393     context_apply_blit_state(context, device);
4394     gl_info = context->gl_info;
4395
4396     ENTER_GL();
4397
4398     if (!surface_is_offscreen(surface))
4399     {
4400         GLenum buffer = surface_get_gl_buffer(surface);
4401         TRACE("Unlocking %#x buffer.\n", buffer);
4402         context_set_draw_buffer(context, buffer);
4403
4404         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4405         glPixelZoom(1.0f, -1.0f);
4406     }
4407     else
4408     {
4409         /* Primary offscreen render target */
4410         TRACE("Offscreen render target.\n");
4411         context_set_draw_buffer(context, device->offscreenBuffer);
4412
4413         glPixelZoom(1.0f, 1.0f);
4414     }
4415
4416     glRasterPos3i(local_rect.left, local_rect.top, 1);
4417     checkGLcall("glRasterPos3i");
4418
4419     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4420     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4421
4422     if (surface->flags & SFLAG_PBO)
4423     {
4424         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4425         checkGLcall("glBindBufferARB");
4426     }
4427
4428     glDrawPixels(w, h, fmt, type, mem);
4429     checkGLcall("glDrawPixels");
4430
4431     if (surface->flags & SFLAG_PBO)
4432     {
4433         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4434         checkGLcall("glBindBufferARB");
4435     }
4436
4437     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4438     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4439
4440     LEAVE_GL();
4441
4442     if (wined3d_settings.strict_draw_ordering
4443             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4444             && surface->container.u.swapchain->front_buffer == surface))
4445         wglFlush();
4446
4447     context_release(context);
4448 }
4449
4450 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck, BOOL use_texturing,
4451         struct wined3d_format *format, enum wined3d_conversion_type *conversion_type)
4452 {
4453     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4454     const struct wined3d_device *device = surface->resource.device;
4455     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4456     BOOL blit_supported = FALSE;
4457
4458     /* Copy the default values from the surface. Below we might perform fixups */
4459     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4460     *format = *surface->resource.format;
4461     *conversion_type = WINED3D_CT_NONE;
4462
4463     /* Ok, now look if we have to do any conversion */
4464     switch (surface->resource.format->id)
4465     {
4466         case WINED3DFMT_P8_UINT:
4467             /* Below the call to blit_supported is disabled for Wine 1.2
4468              * because the function isn't operating correctly yet. At the
4469              * moment 8-bit blits are handled in software and if certain GL
4470              * extensions are around, surface conversion is performed at
4471              * upload time. The blit_supported call recognizes it as a
4472              * destination fixup. This type of upload 'fixup' and 8-bit to
4473              * 8-bit blits need to be handled by the blit_shader.
4474              * TODO: get rid of this #if 0. */
4475 #if 0
4476             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4477                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4478                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4479 #endif
4480             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4481
4482             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4483              * texturing. Further also use conversion in case of color keying.
4484              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4485              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4486              * conflicts with this.
4487              */
4488             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4489                     || colorkey_active || !use_texturing)
4490             {
4491                 format->glFormat = GL_RGBA;
4492                 format->glInternal = GL_RGBA;
4493                 format->glType = GL_UNSIGNED_BYTE;
4494                 format->conv_byte_count = 4;
4495                 if (colorkey_active)
4496                     *conversion_type = WINED3D_CT_PALETTED_CK;
4497                 else
4498                     *conversion_type = WINED3D_CT_PALETTED;
4499             }
4500             break;
4501
4502         case WINED3DFMT_B2G3R3_UNORM:
4503             /* **********************
4504                 GL_UNSIGNED_BYTE_3_3_2
4505                 ********************** */
4506             if (colorkey_active) {
4507                 /* This texture format will never be used.. So do not care about color keying
4508                     up until the point in time it will be needed :-) */
4509                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4510             }
4511             break;
4512
4513         case WINED3DFMT_B5G6R5_UNORM:
4514             if (colorkey_active)
4515             {
4516                 *conversion_type = WINED3D_CT_CK_565;
4517                 format->glFormat = GL_RGBA;
4518                 format->glInternal = GL_RGB5_A1;
4519                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4520                 format->conv_byte_count = 2;
4521             }
4522             break;
4523
4524         case WINED3DFMT_B5G5R5X1_UNORM:
4525             if (colorkey_active)
4526             {
4527                 *conversion_type = WINED3D_CT_CK_5551;
4528                 format->glFormat = GL_BGRA;
4529                 format->glInternal = GL_RGB5_A1;
4530                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4531                 format->conv_byte_count = 2;
4532             }
4533             break;
4534
4535         case WINED3DFMT_B8G8R8_UNORM:
4536             if (colorkey_active)
4537             {
4538                 *conversion_type = WINED3D_CT_CK_RGB24;
4539                 format->glFormat = GL_RGBA;
4540                 format->glInternal = GL_RGBA8;
4541                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4542                 format->conv_byte_count = 4;
4543             }
4544             break;
4545
4546         case WINED3DFMT_B8G8R8X8_UNORM:
4547             if (colorkey_active)
4548             {
4549                 *conversion_type = WINED3D_CT_RGB32_888;
4550                 format->glFormat = GL_RGBA;
4551                 format->glInternal = GL_RGBA8;
4552                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4553                 format->conv_byte_count = 4;
4554             }
4555             break;
4556
4557         case WINED3DFMT_B8G8R8A8_UNORM:
4558             if (colorkey_active)
4559             {
4560                 *conversion_type = WINED3D_CT_CK_ARGB32;
4561                 format->conv_byte_count = 4;
4562             }
4563             break;
4564
4565         default:
4566             break;
4567     }
4568
4569     if (*conversion_type != WINED3D_CT_NONE)
4570     {
4571         format->rtInternal = format->glInternal;
4572         format->glGammaInternal = format->glInternal;
4573     }
4574
4575     return WINED3D_OK;
4576 }
4577
4578 static BOOL color_in_range(const struct wined3d_color_key *color_key, DWORD color)
4579 {
4580     /* FIXME: Is this really how color keys are supposed to work? I think it
4581      * makes more sense to compare the individual channels. */
4582     return color >= color_key->color_space_low_value
4583             && color <= color_key->color_space_high_value;
4584 }
4585
4586 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4587 {
4588     const struct wined3d_device *device = surface->resource.device;
4589     const struct wined3d_palette *pal = surface->palette;
4590     BOOL index_in_alpha = FALSE;
4591     unsigned int i;
4592
4593     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4594      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4595      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4596      * duplicate entries. Store the color key in the unused alpha component to speed the
4597      * download up and to make conversion unneeded. */
4598     index_in_alpha = primary_render_target_is_p8(device);
4599
4600     if (!pal)
4601     {
4602         ERR("This code should never get entered for DirectDraw!, expect problems\n");
4603         if (index_in_alpha)
4604         {
4605             /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4606              * there's no palette at this time. */
4607             for (i = 0; i < 256; i++) table[i][3] = i;
4608         }
4609     }
4610     else
4611     {
4612         TRACE("Using surface palette %p\n", pal);
4613         /* Get the surface's palette */
4614         for (i = 0; i < 256; ++i)
4615         {
4616             table[i][0] = pal->palents[i].peRed;
4617             table[i][1] = pal->palents[i].peGreen;
4618             table[i][2] = pal->palents[i].peBlue;
4619
4620             /* When index_in_alpha is set the palette index is stored in the
4621              * alpha component. In case of a readback we can then read
4622              * GL_ALPHA. Color keying is handled in BltOverride using a
4623              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4624              * color key itself is passed to glAlphaFunc in other cases the
4625              * alpha component of pixels that should be masked away is set to 0. */
4626             if (index_in_alpha)
4627                 table[i][3] = i;
4628             else if (colorkey && color_in_range(&surface->src_blt_color_key, i))
4629                 table[i][3] = 0x00;
4630             else if (pal->flags & WINEDDPCAPS_ALPHA)
4631                 table[i][3] = pal->palents[i].peFlags;
4632             else
4633                 table[i][3] = 0xFF;
4634         }
4635     }
4636 }
4637
4638 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width, UINT height,
4639         UINT outpitch, enum wined3d_conversion_type conversion_type, struct wined3d_surface *surface)
4640 {
4641     const BYTE *source;
4642     BYTE *dest;
4643
4644     TRACE("src %p, dst %p, pitch %u, width %u, height %u, outpitch %u, conversion_type %#x, surface %p.\n",
4645             src, dst, pitch, width, height, outpitch, conversion_type, surface);
4646
4647     switch (conversion_type)
4648     {
4649         case WINED3D_CT_NONE:
4650         {
4651             memcpy(dst, src, pitch * height);
4652             break;
4653         }
4654
4655         case WINED3D_CT_PALETTED:
4656         case WINED3D_CT_PALETTED_CK:
4657         {
4658             BYTE table[256][4];
4659             unsigned int x, y;
4660
4661             d3dfmt_p8_init_palette(surface, table, (conversion_type == WINED3D_CT_PALETTED_CK));
4662
4663             for (y = 0; y < height; y++)
4664             {
4665                 source = src + pitch * y;
4666                 dest = dst + outpitch * y;
4667                 /* This is an 1 bpp format, using the width here is fine */
4668                 for (x = 0; x < width; x++) {
4669                     BYTE color = *source++;
4670                     *dest++ = table[color][0];
4671                     *dest++ = table[color][1];
4672                     *dest++ = table[color][2];
4673                     *dest++ = table[color][3];
4674                 }
4675             }
4676         }
4677         break;
4678
4679         case WINED3D_CT_CK_565:
4680         {
4681             /* Converting the 565 format in 5551 packed to emulate color-keying.
4682
4683               Note : in all these conversion, it would be best to average the averaging
4684                       pixels to get the color of the pixel that will be color-keyed to
4685                       prevent 'color bleeding'. This will be done later on if ever it is
4686                       too visible.
4687
4688               Note2: Nvidia documents say that their driver does not support alpha + color keying
4689                      on the same surface and disables color keying in such a case
4690             */
4691             unsigned int x, y;
4692             const WORD *Source;
4693             WORD *Dest;
4694
4695             TRACE("Color keyed 565\n");
4696
4697             for (y = 0; y < height; y++) {
4698                 Source = (const WORD *)(src + y * pitch);
4699                 Dest = (WORD *) (dst + y * outpitch);
4700                 for (x = 0; x < width; x++ ) {
4701                     WORD color = *Source++;
4702                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4703                     if (!color_in_range(&surface->src_blt_color_key, color))
4704                         *Dest |= 0x0001;
4705                     Dest++;
4706                 }
4707             }
4708         }
4709         break;
4710
4711         case WINED3D_CT_CK_5551:
4712         {
4713             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4714             unsigned int x, y;
4715             const WORD *Source;
4716             WORD *Dest;
4717             TRACE("Color keyed 5551\n");
4718             for (y = 0; y < height; y++) {
4719                 Source = (const WORD *)(src + y * pitch);
4720                 Dest = (WORD *) (dst + y * outpitch);
4721                 for (x = 0; x < width; x++ ) {
4722                     WORD color = *Source++;
4723                     *Dest = color;
4724                     if (!color_in_range(&surface->src_blt_color_key, color))
4725                         *Dest |= (1 << 15);
4726                     else
4727                         *Dest &= ~(1 << 15);
4728                     Dest++;
4729                 }
4730             }
4731         }
4732         break;
4733
4734         case WINED3D_CT_CK_RGB24:
4735         {
4736             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4737             unsigned int x, y;
4738             for (y = 0; y < height; y++)
4739             {
4740                 source = src + pitch * y;
4741                 dest = dst + outpitch * y;
4742                 for (x = 0; x < width; x++) {
4743                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4744                     DWORD dstcolor = color << 8;
4745                     if (!color_in_range(&surface->src_blt_color_key, color))
4746                         dstcolor |= 0xff;
4747                     *(DWORD*)dest = dstcolor;
4748                     source += 3;
4749                     dest += 4;
4750                 }
4751             }
4752         }
4753         break;
4754
4755         case WINED3D_CT_RGB32_888:
4756         {
4757             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4758             unsigned int x, y;
4759             for (y = 0; y < height; y++)
4760             {
4761                 source = src + pitch * y;
4762                 dest = dst + outpitch * y;
4763                 for (x = 0; x < width; x++) {
4764                     DWORD color = 0xffffff & *(const DWORD*)source;
4765                     DWORD dstcolor = color << 8;
4766                     if (!color_in_range(&surface->src_blt_color_key, color))
4767                         dstcolor |= 0xff;
4768                     *(DWORD*)dest = dstcolor;
4769                     source += 4;
4770                     dest += 4;
4771                 }
4772             }
4773         }
4774         break;
4775
4776         case WINED3D_CT_CK_ARGB32:
4777         {
4778             unsigned int x, y;
4779             for (y = 0; y < height; ++y)
4780             {
4781                 source = src + pitch * y;
4782                 dest = dst + outpitch * y;
4783                 for (x = 0; x < width; ++x)
4784                 {
4785                     DWORD color = *(const DWORD *)source;
4786                     if (color_in_range(&surface->src_blt_color_key, color))
4787                         color &= ~0xff000000;
4788                     *(DWORD*)dest = color;
4789                     source += 4;
4790                     dest += 4;
4791                 }
4792             }
4793         }
4794         break;
4795
4796         default:
4797             ERR("Unsupported conversion type %#x.\n", conversion_type);
4798     }
4799     return WINED3D_OK;
4800 }
4801
4802 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4803 {
4804     /* Flip the surface contents */
4805     /* Flip the DC */
4806     {
4807         HDC tmp;
4808         tmp = front->hDC;
4809         front->hDC = back->hDC;
4810         back->hDC = tmp;
4811     }
4812
4813     /* Flip the DIBsection */
4814     {
4815         HBITMAP tmp = front->dib.DIBsection;
4816         front->dib.DIBsection = back->dib.DIBsection;
4817         back->dib.DIBsection = tmp;
4818     }
4819
4820     /* Flip the surface data */
4821     {
4822         void* tmp;
4823
4824         tmp = front->dib.bitmap_data;
4825         front->dib.bitmap_data = back->dib.bitmap_data;
4826         back->dib.bitmap_data = tmp;
4827
4828         tmp = front->resource.allocatedMemory;
4829         front->resource.allocatedMemory = back->resource.allocatedMemory;
4830         back->resource.allocatedMemory = tmp;
4831
4832         tmp = front->resource.heapMemory;
4833         front->resource.heapMemory = back->resource.heapMemory;
4834         back->resource.heapMemory = tmp;
4835     }
4836
4837     /* Flip the PBO */
4838     {
4839         GLuint tmp_pbo = front->pbo;
4840         front->pbo = back->pbo;
4841         back->pbo = tmp_pbo;
4842     }
4843
4844     /* Flip the opengl texture */
4845     {
4846         GLuint tmp;
4847
4848         tmp = back->texture_name;
4849         back->texture_name = front->texture_name;
4850         front->texture_name = tmp;
4851
4852         tmp = back->texture_name_srgb;
4853         back->texture_name_srgb = front->texture_name_srgb;
4854         front->texture_name_srgb = tmp;
4855
4856         tmp = back->rb_multisample;
4857         back->rb_multisample = front->rb_multisample;
4858         front->rb_multisample = tmp;
4859
4860         tmp = back->rb_resolved;
4861         back->rb_resolved = front->rb_resolved;
4862         front->rb_resolved = tmp;
4863
4864         resource_unload(&back->resource);
4865         resource_unload(&front->resource);
4866     }
4867
4868     {
4869         DWORD tmp_flags = back->flags;
4870         back->flags = front->flags;
4871         front->flags = tmp_flags;
4872     }
4873 }
4874
4875 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4876  * pixel copy calls. */
4877 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4878         const RECT *src_rect, const RECT *dst_rect_in, enum wined3d_texture_filter_type filter)
4879 {
4880     struct wined3d_device *device = dst_surface->resource.device;
4881     float xrel, yrel;
4882     UINT row;
4883     struct wined3d_context *context;
4884     BOOL upsidedown = FALSE;
4885     RECT dst_rect = *dst_rect_in;
4886
4887     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4888      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4889      */
4890     if(dst_rect.top > dst_rect.bottom) {
4891         UINT tmp = dst_rect.bottom;
4892         dst_rect.bottom = dst_rect.top;
4893         dst_rect.top = tmp;
4894         upsidedown = TRUE;
4895     }
4896
4897     context = context_acquire(device, src_surface);
4898     context_apply_blit_state(context, device);
4899     surface_internal_preload(dst_surface, SRGB_RGB);
4900     ENTER_GL();
4901
4902     /* Bind the target texture */
4903     context_bind_texture(context, dst_surface->texture_target, dst_surface->texture_name);
4904     if (surface_is_offscreen(src_surface))
4905     {
4906         TRACE("Reading from an offscreen target\n");
4907         upsidedown = !upsidedown;
4908         glReadBuffer(device->offscreenBuffer);
4909     }
4910     else
4911     {
4912         glReadBuffer(surface_get_gl_buffer(src_surface));
4913     }
4914     checkGLcall("glReadBuffer");
4915
4916     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4917     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4918
4919     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4920     {
4921         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4922
4923         if (filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT)
4924             ERR("Texture filtering not supported in direct blit.\n");
4925     }
4926     else if ((filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT)
4927             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4928     {
4929         ERR("Texture filtering not supported in direct blit\n");
4930     }
4931
4932     if (upsidedown
4933             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4934             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4935     {
4936         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4937
4938         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4939                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4940                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4941                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4942     }
4943     else
4944     {
4945         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4946         /* I have to process this row by row to swap the image,
4947          * otherwise it would be upside down, so stretching in y direction
4948          * doesn't cost extra time
4949          *
4950          * However, stretching in x direction can be avoided if not necessary
4951          */
4952         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4953             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4954             {
4955                 /* Well, that stuff works, but it's very slow.
4956                  * find a better way instead
4957                  */
4958                 UINT col;
4959
4960                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4961                 {
4962                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4963                             dst_rect.left + col /* x offset */, row /* y offset */,
4964                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4965                 }
4966             }
4967             else
4968             {
4969                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4970                         dst_rect.left /* x offset */, row /* y offset */,
4971                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4972             }
4973         }
4974     }
4975     checkGLcall("glCopyTexSubImage2D");
4976
4977     LEAVE_GL();
4978     context_release(context);
4979
4980     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4981      * path is never entered
4982      */
4983     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4984 }
4985
4986 /* Uses the hardware to stretch and flip the image */
4987 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4988         const RECT *src_rect, const RECT *dst_rect_in, enum wined3d_texture_filter_type filter)
4989 {
4990     struct wined3d_device *device = dst_surface->resource.device;
4991     struct wined3d_swapchain *src_swapchain = NULL;
4992     GLuint src, backup = 0;
4993     float left, right, top, bottom; /* Texture coordinates */
4994     UINT fbwidth = src_surface->resource.width;
4995     UINT fbheight = src_surface->resource.height;
4996     struct wined3d_context *context;
4997     GLenum drawBuffer = GL_BACK;
4998     GLenum texture_target;
4999     BOOL noBackBufferBackup;
5000     BOOL src_offscreen;
5001     BOOL upsidedown = FALSE;
5002     RECT dst_rect = *dst_rect_in;
5003
5004     TRACE("Using hwstretch blit\n");
5005     /* Activate the Proper context for reading from the source surface, set it up for blitting */
5006     context = context_acquire(device, src_surface);
5007     context_apply_blit_state(context, device);
5008     surface_internal_preload(dst_surface, SRGB_RGB);
5009
5010     src_offscreen = surface_is_offscreen(src_surface);
5011     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
5012     if (!noBackBufferBackup && !src_surface->texture_name)
5013     {
5014         /* Get it a description */
5015         surface_internal_preload(src_surface, SRGB_RGB);
5016     }
5017     ENTER_GL();
5018
5019     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
5020      * This way we don't have to wait for the 2nd readback to finish to leave this function.
5021      */
5022     if (context->aux_buffers >= 2)
5023     {
5024         /* Got more than one aux buffer? Use the 2nd aux buffer */
5025         drawBuffer = GL_AUX1;
5026     }
5027     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
5028     {
5029         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
5030         drawBuffer = GL_AUX0;
5031     }
5032
5033     if(noBackBufferBackup) {
5034         glGenTextures(1, &backup);
5035         checkGLcall("glGenTextures");
5036         context_bind_texture(context, GL_TEXTURE_2D, backup);
5037         texture_target = GL_TEXTURE_2D;
5038     } else {
5039         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
5040          * we are reading from the back buffer, the backup can be used as source texture
5041          */
5042         texture_target = src_surface->texture_target;
5043         context_bind_texture(context, texture_target, src_surface->texture_name);
5044         glEnable(texture_target);
5045         checkGLcall("glEnable(texture_target)");
5046
5047         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
5048         src_surface->flags &= ~SFLAG_INTEXTURE;
5049     }
5050
5051     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
5052      * glCopyTexSubImage is a bit picky about the parameters we pass to it
5053      */
5054     if(dst_rect.top > dst_rect.bottom) {
5055         UINT tmp = dst_rect.bottom;
5056         dst_rect.bottom = dst_rect.top;
5057         dst_rect.top = tmp;
5058         upsidedown = TRUE;
5059     }
5060
5061     if (src_offscreen)
5062     {
5063         TRACE("Reading from an offscreen target\n");
5064         upsidedown = !upsidedown;
5065         glReadBuffer(device->offscreenBuffer);
5066     }
5067     else
5068     {
5069         glReadBuffer(surface_get_gl_buffer(src_surface));
5070     }
5071
5072     /* TODO: Only back up the part that will be overwritten */
5073     glCopyTexSubImage2D(texture_target, 0,
5074                         0, 0 /* read offsets */,
5075                         0, 0,
5076                         fbwidth,
5077                         fbheight);
5078
5079     checkGLcall("glCopyTexSubImage2D");
5080
5081     /* No issue with overriding these - the sampler is dirty due to blit usage */
5082     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
5083             wined3d_gl_mag_filter(magLookup, filter));
5084     checkGLcall("glTexParameteri");
5085     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
5086             wined3d_gl_min_mip_filter(minMipLookup, filter, WINED3D_TEXF_NONE));
5087     checkGLcall("glTexParameteri");
5088
5089     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5090         src_swapchain = src_surface->container.u.swapchain;
5091     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
5092     {
5093         src = backup ? backup : src_surface->texture_name;
5094     }
5095     else
5096     {
5097         glReadBuffer(GL_FRONT);
5098         checkGLcall("glReadBuffer(GL_FRONT)");
5099
5100         glGenTextures(1, &src);
5101         checkGLcall("glGenTextures(1, &src)");
5102         context_bind_texture(context, GL_TEXTURE_2D, src);
5103
5104         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5105          * out for power of 2 sizes
5106          */
5107         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5108                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5109         checkGLcall("glTexImage2D");
5110         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
5111                             0, 0 /* read offsets */,
5112                             0, 0,
5113                             fbwidth,
5114                             fbheight);
5115
5116         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5117         checkGLcall("glTexParameteri");
5118         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5119         checkGLcall("glTexParameteri");
5120
5121         glReadBuffer(GL_BACK);
5122         checkGLcall("glReadBuffer(GL_BACK)");
5123
5124         if(texture_target != GL_TEXTURE_2D) {
5125             glDisable(texture_target);
5126             glEnable(GL_TEXTURE_2D);
5127             texture_target = GL_TEXTURE_2D;
5128         }
5129     }
5130     checkGLcall("glEnd and previous");
5131
5132     left = src_rect->left;
5133     right = src_rect->right;
5134
5135     if (!upsidedown)
5136     {
5137         top = src_surface->resource.height - src_rect->top;
5138         bottom = src_surface->resource.height - src_rect->bottom;
5139     }
5140     else
5141     {
5142         top = src_surface->resource.height - src_rect->bottom;
5143         bottom = src_surface->resource.height - src_rect->top;
5144     }
5145
5146     if (src_surface->flags & SFLAG_NORMCOORD)
5147     {
5148         left /= src_surface->pow2Width;
5149         right /= src_surface->pow2Width;
5150         top /= src_surface->pow2Height;
5151         bottom /= src_surface->pow2Height;
5152     }
5153
5154     /* draw the source texture stretched and upside down. The correct surface is bound already */
5155     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5156     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5157
5158     context_set_draw_buffer(context, drawBuffer);
5159     glReadBuffer(drawBuffer);
5160
5161     glBegin(GL_QUADS);
5162         /* bottom left */
5163         glTexCoord2f(left, bottom);
5164         glVertex2i(0, 0);
5165
5166         /* top left */
5167         glTexCoord2f(left, top);
5168         glVertex2i(0, dst_rect.bottom - dst_rect.top);
5169
5170         /* top right */
5171         glTexCoord2f(right, top);
5172         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5173
5174         /* bottom right */
5175         glTexCoord2f(right, bottom);
5176         glVertex2i(dst_rect.right - dst_rect.left, 0);
5177     glEnd();
5178     checkGLcall("glEnd and previous");
5179
5180     if (texture_target != dst_surface->texture_target)
5181     {
5182         glDisable(texture_target);
5183         glEnable(dst_surface->texture_target);
5184         texture_target = dst_surface->texture_target;
5185     }
5186
5187     /* Now read the stretched and upside down image into the destination texture */
5188     context_bind_texture(context, texture_target, dst_surface->texture_name);
5189     glCopyTexSubImage2D(texture_target,
5190                         0,
5191                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5192                         0, 0, /* We blitted the image to the origin */
5193                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5194     checkGLcall("glCopyTexSubImage2D");
5195
5196     if(drawBuffer == GL_BACK) {
5197         /* Write the back buffer backup back */
5198         if(backup) {
5199             if(texture_target != GL_TEXTURE_2D) {
5200                 glDisable(texture_target);
5201                 glEnable(GL_TEXTURE_2D);
5202                 texture_target = GL_TEXTURE_2D;
5203             }
5204             context_bind_texture(context, GL_TEXTURE_2D, backup);
5205         }
5206         else
5207         {
5208             if (texture_target != src_surface->texture_target)
5209             {
5210                 glDisable(texture_target);
5211                 glEnable(src_surface->texture_target);
5212                 texture_target = src_surface->texture_target;
5213             }
5214             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5215         }
5216
5217         glBegin(GL_QUADS);
5218             /* top left */
5219             glTexCoord2f(0.0f, 0.0f);
5220             glVertex2i(0, fbheight);
5221
5222             /* bottom left */
5223             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5224             glVertex2i(0, 0);
5225
5226             /* bottom right */
5227             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5228                     (float)fbheight / (float)src_surface->pow2Height);
5229             glVertex2i(fbwidth, 0);
5230
5231             /* top right */
5232             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5233             glVertex2i(fbwidth, fbheight);
5234         glEnd();
5235     }
5236     glDisable(texture_target);
5237     checkGLcall("glDisable(texture_target)");
5238
5239     /* Cleanup */
5240     if (src != src_surface->texture_name && src != backup)
5241     {
5242         glDeleteTextures(1, &src);
5243         checkGLcall("glDeleteTextures(1, &src)");
5244     }
5245     if(backup) {
5246         glDeleteTextures(1, &backup);
5247         checkGLcall("glDeleteTextures(1, &backup)");
5248     }
5249
5250     LEAVE_GL();
5251
5252     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5253
5254     context_release(context);
5255
5256     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5257      * path is never entered
5258      */
5259     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5260 }
5261
5262 /* Front buffer coordinates are always full screen coordinates, but our GL
5263  * drawable is limited to the window's client area. The sysmem and texture
5264  * copies do have the full screen size. Note that GL has a bottom-left
5265  * origin, while D3D has a top-left origin. */
5266 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5267 {
5268     UINT drawable_height;
5269
5270     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5271             && surface == surface->container.u.swapchain->front_buffer)
5272     {
5273         POINT offset = {0, 0};
5274         RECT windowsize;
5275
5276         ScreenToClient(window, &offset);
5277         OffsetRect(rect, offset.x, offset.y);
5278
5279         GetClientRect(window, &windowsize);
5280         drawable_height = windowsize.bottom - windowsize.top;
5281     }
5282     else
5283     {
5284         drawable_height = surface->resource.height;
5285     }
5286
5287     rect->top = drawable_height - rect->top;
5288     rect->bottom = drawable_height - rect->bottom;
5289 }
5290
5291 static void surface_blt_to_drawable(const struct wined3d_device *device,
5292         enum wined3d_texture_filter_type filter, BOOL color_key,
5293         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5294         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5295 {
5296     struct wined3d_context *context;
5297     RECT src_rect, dst_rect;
5298
5299     src_rect = *src_rect_in;
5300     dst_rect = *dst_rect_in;
5301
5302     /* Make sure the surface is up-to-date. This should probably use
5303      * surface_load_location() and worry about the destination surface too,
5304      * unless we're overwriting it completely. */
5305     surface_internal_preload(src_surface, SRGB_RGB);
5306
5307     /* Activate the destination context, set it up for blitting */
5308     context = context_acquire(device, dst_surface);
5309     context_apply_blit_state(context, device);
5310
5311     if (!surface_is_offscreen(dst_surface))
5312         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5313
5314     device->blitter->set_shader(device->blit_priv, context, src_surface);
5315
5316     ENTER_GL();
5317
5318     if (color_key)
5319     {
5320         glEnable(GL_ALPHA_TEST);
5321         checkGLcall("glEnable(GL_ALPHA_TEST)");
5322
5323         /* When the primary render target uses P8, the alpha component
5324          * contains the palette index. Which means that the colorkey is one of
5325          * the palette entries. In other cases pixels that should be masked
5326          * away have alpha set to 0. */
5327         if (primary_render_target_is_p8(device))
5328             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->src_blt_color_key.color_space_low_value / 256.0f);
5329         else
5330             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5331         checkGLcall("glAlphaFunc");
5332     }
5333     else
5334     {
5335         glDisable(GL_ALPHA_TEST);
5336         checkGLcall("glDisable(GL_ALPHA_TEST)");
5337     }
5338
5339     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5340
5341     if (color_key)
5342     {
5343         glDisable(GL_ALPHA_TEST);
5344         checkGLcall("glDisable(GL_ALPHA_TEST)");
5345     }
5346
5347     LEAVE_GL();
5348
5349     /* Leave the opengl state valid for blitting */
5350     device->blitter->unset_shader(context->gl_info);
5351
5352     if (wined3d_settings.strict_draw_ordering
5353             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5354             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5355         wglFlush(); /* Flush to ensure ordering across contexts. */
5356
5357     context_release(context);
5358 }
5359
5360 /* Do not call while under the GL lock. */
5361 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const struct wined3d_color *color)
5362 {
5363     struct wined3d_device *device = s->resource.device;
5364     const struct blit_shader *blitter;
5365
5366     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5367             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5368     if (!blitter)
5369     {
5370         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5371         return WINED3DERR_INVALIDCALL;
5372     }
5373
5374     return blitter->color_fill(device, s, rect, color);
5375 }
5376
5377 /* Do not call while under the GL lock. */
5378 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5379         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5380         enum wined3d_texture_filter_type filter)
5381 {
5382     struct wined3d_device *device = dst_surface->resource.device;
5383     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5384     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5385
5386     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5387             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5388             flags, DDBltFx, debug_d3dtexturefiltertype(filter));
5389
5390     /* Get the swapchain. One of the surfaces has to be a primary surface */
5391     if (dst_surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
5392     {
5393         WARN("Destination is in sysmem, rejecting gl blt\n");
5394         return WINED3DERR_INVALIDCALL;
5395     }
5396
5397     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5398         dstSwapchain = dst_surface->container.u.swapchain;
5399
5400     if (src_surface)
5401     {
5402         if (src_surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
5403         {
5404             WARN("Src is in sysmem, rejecting gl blt\n");
5405             return WINED3DERR_INVALIDCALL;
5406         }
5407
5408         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5409             srcSwapchain = src_surface->container.u.swapchain;
5410     }
5411
5412     /* Early sort out of cases where no render target is used */
5413     if (!dstSwapchain && !srcSwapchain
5414             && src_surface != device->fb.render_targets[0]
5415             && dst_surface != device->fb.render_targets[0])
5416     {
5417         TRACE("No surface is render target, not using hardware blit.\n");
5418         return WINED3DERR_INVALIDCALL;
5419     }
5420
5421     /* No destination color keying supported */
5422     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5423     {
5424         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5425         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5426         return WINED3DERR_INVALIDCALL;
5427     }
5428
5429     if (dstSwapchain && dstSwapchain == srcSwapchain)
5430     {
5431         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5432         return WINED3DERR_INVALIDCALL;
5433     }
5434
5435     if (dstSwapchain && srcSwapchain)
5436     {
5437         FIXME("Implement hardware blit between two different swapchains\n");
5438         return WINED3DERR_INVALIDCALL;
5439     }
5440
5441     if (dstSwapchain)
5442     {
5443         /* Handled with regular texture -> swapchain blit */
5444         if (src_surface == device->fb.render_targets[0])
5445             TRACE("Blit from active render target to a swapchain\n");
5446     }
5447     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5448     {
5449         FIXME("Implement blit from a swapchain to the active render target\n");
5450         return WINED3DERR_INVALIDCALL;
5451     }
5452
5453     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5454     {
5455         /* Blit from render target to texture */
5456         BOOL stretchx;
5457
5458         /* P8 read back is not implemented */
5459         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5460                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5461         {
5462             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5463             return WINED3DERR_INVALIDCALL;
5464         }
5465
5466         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5467         {
5468             TRACE("Color keying not supported by frame buffer to texture blit\n");
5469             return WINED3DERR_INVALIDCALL;
5470             /* Destination color key is checked above */
5471         }
5472
5473         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5474             stretchx = TRUE;
5475         else
5476             stretchx = FALSE;
5477
5478         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5479          * flip the image nor scale it.
5480          *
5481          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5482          * -> If the app wants a image width an unscaled width, copy it line per line
5483          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5484          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5485          *    back buffer. This is slower than reading line per line, thus not used for flipping
5486          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5487          *    pixel by pixel. */
5488         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5489                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5490         {
5491             TRACE("No stretching in x direction, using direct framebuffer -> texture copy.\n");
5492             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, filter);
5493         }
5494         else
5495         {
5496             TRACE("Using hardware stretching to flip / stretch the texture.\n");
5497             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, filter);
5498         }
5499
5500         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5501         {
5502             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5503             dst_surface->resource.allocatedMemory = NULL;
5504             dst_surface->resource.heapMemory = NULL;
5505         }
5506         else
5507         {
5508             dst_surface->flags &= ~SFLAG_INSYSMEM;
5509         }
5510
5511         return WINED3D_OK;
5512     }
5513     else if (src_surface)
5514     {
5515         /* Blit from offscreen surface to render target */
5516         struct wined3d_color_key old_blt_key = src_surface->src_blt_color_key;
5517         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5518
5519         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5520
5521         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5522                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5523                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5524         {
5525             FIXME("Unsupported blit operation falling back to software\n");
5526             return WINED3DERR_INVALIDCALL;
5527         }
5528
5529         /* Color keying: Check if we have to do a color keyed blt,
5530          * and if not check if a color key is activated.
5531          *
5532          * Just modify the color keying parameters in the surface and restore them afterwards
5533          * The surface keeps track of the color key last used to load the opengl surface.
5534          * PreLoad will catch the change to the flags and color key and reload if necessary.
5535          */
5536         if (flags & WINEDDBLT_KEYSRC)
5537         {
5538             /* Use color key from surface */
5539         }
5540         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5541         {
5542             /* Use color key from DDBltFx */
5543             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5544             src_surface->src_blt_color_key = DDBltFx->ddckSrcColorkey;
5545         }
5546         else
5547         {
5548             /* Do not use color key */
5549             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5550         }
5551
5552         surface_blt_to_drawable(device, filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5553                 src_surface, src_rect, dst_surface, dst_rect);
5554
5555         /* Restore the color key parameters */
5556         src_surface->CKeyFlags = oldCKeyFlags;
5557         src_surface->src_blt_color_key = old_blt_key;
5558
5559         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5560
5561         return WINED3D_OK;
5562     }
5563
5564     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5565     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5566     return WINED3DERR_INVALIDCALL;
5567 }
5568
5569 /* GL locking is done by the caller */
5570 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5571         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5572 {
5573     struct wined3d_device *device = surface->resource.device;
5574     const struct wined3d_gl_info *gl_info = context->gl_info;
5575     GLint compare_mode = GL_NONE;
5576     struct blt_info info;
5577     GLint old_binding = 0;
5578     RECT rect;
5579
5580     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5581
5582     glDisable(GL_CULL_FACE);
5583     glDisable(GL_BLEND);
5584     glDisable(GL_ALPHA_TEST);
5585     glDisable(GL_SCISSOR_TEST);
5586     glDisable(GL_STENCIL_TEST);
5587     glEnable(GL_DEPTH_TEST);
5588     glDepthFunc(GL_ALWAYS);
5589     glDepthMask(GL_TRUE);
5590     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5591     glViewport(x, y, w, h);
5592     glDepthRange(0.0, 1.0);
5593
5594     SetRect(&rect, 0, h, w, 0);
5595     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5596     context_active_texture(context, context->gl_info, 0);
5597     glGetIntegerv(info.binding, &old_binding);
5598     glBindTexture(info.bind_target, texture);
5599     if (gl_info->supported[ARB_SHADOW])
5600     {
5601         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5602         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5603     }
5604
5605     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5606             gl_info, info.tex_type, &surface->ds_current_size);
5607
5608     glBegin(GL_TRIANGLE_STRIP);
5609     glTexCoord3fv(info.coords[0]);
5610     glVertex2f(-1.0f, -1.0f);
5611     glTexCoord3fv(info.coords[1]);
5612     glVertex2f(1.0f, -1.0f);
5613     glTexCoord3fv(info.coords[2]);
5614     glVertex2f(-1.0f, 1.0f);
5615     glTexCoord3fv(info.coords[3]);
5616     glVertex2f(1.0f, 1.0f);
5617     glEnd();
5618
5619     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5620     glBindTexture(info.bind_target, old_binding);
5621
5622     glPopAttrib();
5623
5624     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5625 }
5626
5627 void surface_modify_ds_location(struct wined3d_surface *surface,
5628         DWORD location, UINT w, UINT h)
5629 {
5630     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5631
5632     if (location & ~(SFLAG_LOCATIONS | SFLAG_DISCARDED))
5633         FIXME("Invalid location (%#x) specified.\n", location);
5634
5635     if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5636             || (!(surface->flags & SFLAG_INTEXTURE) && (location & SFLAG_INTEXTURE)))
5637     {
5638         if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5639         {
5640             TRACE("Passing to container.\n");
5641             wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5642         }
5643     }
5644
5645     surface->ds_current_size.cx = w;
5646     surface->ds_current_size.cy = h;
5647     surface->flags &= ~(SFLAG_LOCATIONS | SFLAG_DISCARDED);
5648     surface->flags |= location;
5649 }
5650
5651 /* Context activation is done by the caller. */
5652 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5653 {
5654     struct wined3d_device *device = surface->resource.device;
5655     GLsizei w, h;
5656
5657     TRACE("surface %p, new location %#x.\n", surface, location);
5658
5659     /* TODO: Make this work for modes other than FBO */
5660     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5661
5662     if (!(surface->flags & location))
5663     {
5664         w = surface->ds_current_size.cx;
5665         h = surface->ds_current_size.cy;
5666         surface->ds_current_size.cx = 0;
5667         surface->ds_current_size.cy = 0;
5668     }
5669     else
5670     {
5671         w = surface->resource.width;
5672         h = surface->resource.height;
5673     }
5674
5675     if (surface->ds_current_size.cx == surface->resource.width
5676             && surface->ds_current_size.cy == surface->resource.height)
5677     {
5678         TRACE("Location (%#x) is already up to date.\n", location);
5679         return;
5680     }
5681
5682     if (surface->current_renderbuffer)
5683     {
5684         FIXME("Not supported with fixed up depth stencil.\n");
5685         return;
5686     }
5687
5688     if (surface->flags & SFLAG_DISCARDED)
5689     {
5690         TRACE("Surface was discarded, no need copy data.\n");
5691         switch (location)
5692         {
5693             case SFLAG_INTEXTURE:
5694                 surface_prepare_texture(surface, context, FALSE);
5695                 break;
5696             case SFLAG_INRB_MULTISAMPLE:
5697                 surface_prepare_rb(surface, context->gl_info, TRUE);
5698                 break;
5699             case SFLAG_INDRAWABLE:
5700                 /* Nothing to do */
5701                 break;
5702             default:
5703                 FIXME("Unhandled location %#x\n", location);
5704         }
5705         surface->flags &= ~SFLAG_DISCARDED;
5706         surface->flags |= location;
5707         surface->ds_current_size.cx = surface->resource.width;
5708         surface->ds_current_size.cy = surface->resource.height;
5709         return;
5710     }
5711
5712     if (!(surface->flags & SFLAG_LOCATIONS))
5713     {
5714         FIXME("No up to date depth stencil location.\n");
5715         surface->flags |= location;
5716         surface->ds_current_size.cx = surface->resource.width;
5717         surface->ds_current_size.cy = surface->resource.height;
5718         return;
5719     }
5720
5721     if (location == SFLAG_INTEXTURE)
5722     {
5723         GLint old_binding = 0;
5724         GLenum bind_target;
5725
5726         /* The render target is allowed to be smaller than the depth/stencil
5727          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5728          * than the offscreen surface. Don't overwrite the offscreen surface
5729          * with undefined data. */
5730         w = min(w, context->swapchain->desc.backbuffer_width);
5731         h = min(h, context->swapchain->desc.backbuffer_height);
5732
5733         TRACE("Copying onscreen depth buffer to depth texture.\n");
5734
5735         ENTER_GL();
5736
5737         if (!device->depth_blt_texture)
5738         {
5739             glGenTextures(1, &device->depth_blt_texture);
5740         }
5741
5742         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5743          * directly on the FBO texture. That's because we need to flip. */
5744         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5745                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5746         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5747         {
5748             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5749             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5750         }
5751         else
5752         {
5753             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5754             bind_target = GL_TEXTURE_2D;
5755         }
5756         glBindTexture(bind_target, device->depth_blt_texture);
5757         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5758          * internal format, because the internal format might include stencil
5759          * data. In principle we should copy stencil data as well, but unless
5760          * the driver supports stencil export it's hard to do, and doesn't
5761          * seem to be needed in practice. If the hardware doesn't support
5762          * writing stencil data, the glCopyTexImage2D() call might trigger
5763          * software fallbacks. */
5764         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5765         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5766         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5767         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5768         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5769         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5770         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5771         glBindTexture(bind_target, old_binding);
5772
5773         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5774                 NULL, surface, SFLAG_INTEXTURE);
5775         context_set_draw_buffer(context, GL_NONE);
5776         glReadBuffer(GL_NONE);
5777
5778         /* Do the actual blit */
5779         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5780         checkGLcall("depth_blt");
5781
5782         context_invalidate_state(context, STATE_FRAMEBUFFER);
5783
5784         LEAVE_GL();
5785
5786         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5787     }
5788     else if (location == SFLAG_INDRAWABLE)
5789     {
5790         TRACE("Copying depth texture to onscreen depth buffer.\n");
5791
5792         ENTER_GL();
5793
5794         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5795                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5796         surface_depth_blt(surface, context, surface->texture_name,
5797                 0, surface->pow2Height - h, w, h, surface->texture_target);
5798         checkGLcall("depth_blt");
5799
5800         context_invalidate_state(context, STATE_FRAMEBUFFER);
5801
5802         LEAVE_GL();
5803
5804         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5805     }
5806     else
5807     {
5808         ERR("Invalid location (%#x) specified.\n", location);
5809     }
5810
5811     surface->flags |= location;
5812     surface->ds_current_size.cx = surface->resource.width;
5813     surface->ds_current_size.cy = surface->resource.height;
5814 }
5815
5816 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5817 {
5818     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5819     struct wined3d_surface *overlay;
5820
5821     TRACE("surface %p, location %s, persistent %#x.\n",
5822             surface, debug_surflocation(location), persistent);
5823
5824     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5825             && !(surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
5826             && (location & SFLAG_INDRAWABLE))
5827         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5828
5829     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5830             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5831         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5832
5833     if (persistent)
5834     {
5835         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5836                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5837         {
5838             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5839             {
5840                 TRACE("Passing to container.\n");
5841                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5842             }
5843         }
5844         surface->flags &= ~SFLAG_LOCATIONS;
5845         surface->flags |= location;
5846
5847         /* Redraw emulated overlays, if any */
5848         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5849         {
5850             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5851             {
5852                 surface_draw_overlay(overlay);
5853             }
5854         }
5855     }
5856     else
5857     {
5858         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5859         {
5860             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5861             {
5862                 TRACE("Passing to container\n");
5863                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5864             }
5865         }
5866         surface->flags &= ~location;
5867     }
5868
5869     if (!(surface->flags & SFLAG_LOCATIONS))
5870     {
5871         ERR("Surface %p does not have any up to date location.\n", surface);
5872     }
5873 }
5874
5875 static DWORD resource_access_from_location(DWORD location)
5876 {
5877     switch (location)
5878     {
5879         case SFLAG_INSYSMEM:
5880             return WINED3D_RESOURCE_ACCESS_CPU;
5881
5882         case SFLAG_INDRAWABLE:
5883         case SFLAG_INSRGBTEX:
5884         case SFLAG_INTEXTURE:
5885         case SFLAG_INRB_MULTISAMPLE:
5886         case SFLAG_INRB_RESOLVED:
5887             return WINED3D_RESOURCE_ACCESS_GPU;
5888
5889         default:
5890             FIXME("Unhandled location %#x.\n", location);
5891             return 0;
5892     }
5893 }
5894
5895 static void surface_load_sysmem(struct wined3d_surface *surface,
5896         const struct wined3d_gl_info *gl_info, const RECT *rect)
5897 {
5898     surface_prepare_system_memory(surface);
5899
5900     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED))
5901         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5902
5903     /* Download the surface to system memory. */
5904     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5905     {
5906         struct wined3d_device *device = surface->resource.device;
5907         struct wined3d_context *context;
5908
5909         /* TODO: Use already acquired context when possible. */
5910         context = context_acquire(device, NULL);
5911
5912         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5913         surface_download_data(surface, gl_info);
5914
5915         context_release(context);
5916
5917         return;
5918     }
5919
5920     if (surface->flags & SFLAG_INDRAWABLE)
5921     {
5922         read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5923                 wined3d_surface_get_pitch(surface));
5924         return;
5925     }
5926
5927     FIXME("Can't load surface %p with location flags %#x into sysmem.\n",
5928             surface, surface->flags & SFLAG_LOCATIONS);
5929 }
5930
5931 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5932         const struct wined3d_gl_info *gl_info, const RECT *rect)
5933 {
5934     struct wined3d_device *device = surface->resource.device;
5935     enum wined3d_conversion_type convert;
5936     struct wined3d_format format;
5937     UINT byte_count;
5938     BYTE *mem;
5939
5940     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5941     {
5942         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5943         return WINED3DERR_INVALIDCALL;
5944     }
5945
5946     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5947         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5948
5949     if (surface->flags & SFLAG_INTEXTURE)
5950     {
5951         RECT r;
5952
5953         surface_get_rect(surface, rect, &r);
5954         surface_blt_to_drawable(device, WINED3D_TEXF_POINT, FALSE, surface, &r, surface, &r);
5955
5956         return WINED3D_OK;
5957     }
5958
5959     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5960     {
5961         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5962          * path through sysmem. */
5963         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5964     }
5965
5966     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5967
5968     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5969      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5970      * called. */
5971     if ((convert != WINED3D_CT_NONE) && (surface->flags & SFLAG_PBO))
5972     {
5973         struct wined3d_context *context;
5974
5975         TRACE("Removing the pbo attached to surface %p.\n", surface);
5976
5977         /* TODO: Use already acquired context when possible. */
5978         context = context_acquire(device, NULL);
5979
5980         surface_remove_pbo(surface, gl_info);
5981
5982         context_release(context);
5983     }
5984
5985     if ((convert != WINED3D_CT_NONE) && surface->resource.allocatedMemory)
5986     {
5987         UINT height = surface->resource.height;
5988         UINT width = surface->resource.width;
5989         UINT src_pitch, dst_pitch;
5990
5991         byte_count = format.conv_byte_count;
5992         src_pitch = wined3d_surface_get_pitch(surface);
5993
5994         /* Stick to the alignment for the converted surface too, makes it
5995          * easier to load the surface. */
5996         dst_pitch = width * byte_count;
5997         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5998
5999         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6000         {
6001             ERR("Out of memory (%u).\n", dst_pitch * height);
6002             return E_OUTOFMEMORY;
6003         }
6004
6005         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
6006                 src_pitch, width, height, dst_pitch, convert, surface);
6007
6008         surface->flags |= SFLAG_CONVERTED;
6009     }
6010     else
6011     {
6012         surface->flags &= ~SFLAG_CONVERTED;
6013         mem = surface->resource.allocatedMemory;
6014         byte_count = format.byte_count;
6015     }
6016
6017     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
6018
6019     /* Don't delete PBO memory. */
6020     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6021         HeapFree(GetProcessHeap(), 0, mem);
6022
6023     return WINED3D_OK;
6024 }
6025
6026 static HRESULT surface_load_texture(struct wined3d_surface *surface,
6027         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
6028 {
6029     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
6030     struct wined3d_device *device = surface->resource.device;
6031     enum wined3d_conversion_type convert;
6032     struct wined3d_context *context;
6033     UINT width, src_pitch, dst_pitch;
6034     struct wined3d_bo_address data;
6035     struct wined3d_format format;
6036     POINT dst_point = {0, 0};
6037     BYTE *mem;
6038
6039     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
6040             && surface_is_offscreen(surface)
6041             && (surface->flags & SFLAG_INDRAWABLE))
6042     {
6043         surface_load_fb_texture(surface, srgb);
6044
6045         return WINED3D_OK;
6046     }
6047
6048     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
6049             && (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB)
6050             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6051                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6052                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6053     {
6054         if (srgb)
6055             surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, SFLAG_INTEXTURE,
6056                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
6057         else
6058             surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, SFLAG_INSRGBTEX,
6059                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
6060
6061         return WINED3D_OK;
6062     }
6063
6064     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED)
6065             && (!srgb || (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB))
6066             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6067                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6068                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6069     {
6070         DWORD src_location = surface->flags & SFLAG_INRB_RESOLVED ? SFLAG_INRB_RESOLVED : SFLAG_INRB_MULTISAMPLE;
6071         DWORD dst_location = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
6072         RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6073
6074         surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, src_location,
6075                 &rect, surface, dst_location, &rect);
6076
6077         return WINED3D_OK;
6078     }
6079
6080     /* Upload from system memory */
6081
6082     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
6083             TRUE /* We will use textures */, &format, &convert);
6084
6085     if (srgb)
6086     {
6087         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
6088         {
6089             /* Performance warning... */
6090             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6091             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6092         }
6093     }
6094     else
6095     {
6096         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6097         {
6098             /* Performance warning... */
6099             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6100             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6101         }
6102     }
6103
6104     if (!(surface->flags & SFLAG_INSYSMEM))
6105     {
6106         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6107         /* Lets hope we get it from somewhere... */
6108         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6109     }
6110
6111     /* TODO: Use already acquired context when possible. */
6112     context = context_acquire(device, NULL);
6113
6114     surface_prepare_texture(surface, context, srgb);
6115     surface_bind_and_dirtify(surface, context, srgb);
6116
6117     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6118     {
6119         surface->flags |= SFLAG_GLCKEY;
6120         surface->gl_color_key = surface->src_blt_color_key;
6121     }
6122     else surface->flags &= ~SFLAG_GLCKEY;
6123
6124     width = surface->resource.width;
6125     src_pitch = wined3d_surface_get_pitch(surface);
6126
6127     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6128      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6129      * called. */
6130     if ((convert != WINED3D_CT_NONE || format.convert) && (surface->flags & SFLAG_PBO))
6131     {
6132         TRACE("Removing the pbo attached to surface %p.\n", surface);
6133         surface_remove_pbo(surface, gl_info);
6134     }
6135
6136     if (format.convert)
6137     {
6138         /* This code is entered for texture formats which need a fixup. */
6139         UINT height = surface->resource.height;
6140
6141         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6142         dst_pitch = width * format.conv_byte_count;
6143         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6144
6145         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6146         {
6147             ERR("Out of memory (%u).\n", dst_pitch * height);
6148             context_release(context);
6149             return E_OUTOFMEMORY;
6150         }
6151         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6152         format.byte_count = format.conv_byte_count;
6153         src_pitch = dst_pitch;
6154     }
6155     else if (convert != WINED3D_CT_NONE && surface->resource.allocatedMemory)
6156     {
6157         /* This code is only entered for color keying fixups */
6158         UINT height = surface->resource.height;
6159
6160         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6161         dst_pitch = width * format.conv_byte_count;
6162         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6163
6164         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6165         {
6166             ERR("Out of memory (%u).\n", dst_pitch * height);
6167             context_release(context);
6168             return E_OUTOFMEMORY;
6169         }
6170         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6171                 width, height, dst_pitch, convert, surface);
6172         format.byte_count = format.conv_byte_count;
6173         src_pitch = dst_pitch;
6174     }
6175     else
6176     {
6177         mem = surface->resource.allocatedMemory;
6178     }
6179
6180     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6181     data.addr = mem;
6182     surface_upload_data(surface, gl_info, &format, &src_rect, src_pitch, &dst_point, srgb, &data);
6183
6184     context_release(context);
6185
6186     /* Don't delete PBO memory. */
6187     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6188         HeapFree(GetProcessHeap(), 0, mem);
6189
6190     return WINED3D_OK;
6191 }
6192
6193 static void surface_multisample_resolve(struct wined3d_surface *surface)
6194 {
6195     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6196
6197     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6198         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6199
6200     surface_blt_fbo(surface->resource.device, WINED3D_TEXF_POINT,
6201             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6202 }
6203
6204 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6205 {
6206     struct wined3d_device *device = surface->resource.device;
6207     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6208     HRESULT hr;
6209
6210     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6211
6212     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6213     {
6214         if (location == SFLAG_INTEXTURE)
6215         {
6216             struct wined3d_context *context = context_acquire(device, NULL);
6217             surface_load_ds_location(surface, context, location);
6218             context_release(context);
6219             return WINED3D_OK;
6220         }
6221         else
6222         {
6223             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6224             return WINED3DERR_INVALIDCALL;
6225         }
6226     }
6227
6228     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6229         location = SFLAG_INTEXTURE;
6230
6231     if (surface->flags & location)
6232     {
6233         TRACE("Location already up to date.\n");
6234
6235         if (location == SFLAG_INSYSMEM && !(surface->flags & SFLAG_PBO)
6236                 && surface_need_pbo(surface, gl_info))
6237             surface_load_pbo(surface, gl_info);
6238
6239         return WINED3D_OK;
6240     }
6241
6242     if (WARN_ON(d3d_surface))
6243     {
6244         DWORD required_access = resource_access_from_location(location);
6245         if ((surface->resource.access_flags & required_access) != required_access)
6246             WARN("Operation requires %#x access, but surface only has %#x.\n",
6247                     required_access, surface->resource.access_flags);
6248     }
6249
6250     if (!(surface->flags & SFLAG_LOCATIONS))
6251     {
6252         ERR("Surface %p does not have any up to date location.\n", surface);
6253         surface->flags |= SFLAG_LOST;
6254         return WINED3DERR_DEVICELOST;
6255     }
6256
6257     switch (location)
6258     {
6259         case SFLAG_INSYSMEM:
6260             surface_load_sysmem(surface, gl_info, rect);
6261             break;
6262
6263         case SFLAG_INDRAWABLE:
6264             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6265                 return hr;
6266             break;
6267
6268         case SFLAG_INRB_RESOLVED:
6269             surface_multisample_resolve(surface);
6270             break;
6271
6272         case SFLAG_INTEXTURE:
6273         case SFLAG_INSRGBTEX:
6274             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6275                 return hr;
6276             break;
6277
6278         default:
6279             ERR("Don't know how to handle location %#x.\n", location);
6280             break;
6281     }
6282
6283     if (!rect)
6284     {
6285         surface->flags |= location;
6286
6287         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6288             surface_evict_sysmem(surface);
6289     }
6290
6291     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6292             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6293     {
6294         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6295     }
6296
6297     return WINED3D_OK;
6298 }
6299
6300 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6301 {
6302     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6303
6304     /* Not on a swapchain - must be offscreen */
6305     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6306
6307     /* The front buffer is always onscreen */
6308     if (surface == swapchain->front_buffer) return FALSE;
6309
6310     /* If the swapchain is rendered to an FBO, the backbuffer is
6311      * offscreen, otherwise onscreen */
6312     return swapchain->render_to_fbo;
6313 }
6314
6315 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6316 /* Context activation is done by the caller. */
6317 static void ffp_blit_free(struct wined3d_device *device) { }
6318
6319 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6320 /* Context activation is done by the caller. */
6321 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6322 {
6323     BYTE table[256][4];
6324     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6325
6326     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6327
6328     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6329     ENTER_GL();
6330     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6331     LEAVE_GL();
6332 }
6333
6334 /* Context activation is done by the caller. */
6335 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6336 {
6337     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6338
6339     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6340      * else the surface is converted in software at upload time in LoadLocation.
6341      */
6342     if (!(surface->flags & SFLAG_CONVERTED) && fixup == COMPLEX_FIXUP_P8
6343             && context->gl_info->supported[EXT_PALETTED_TEXTURE])
6344         ffp_blit_p8_upload_palette(surface, context->gl_info);
6345
6346     ENTER_GL();
6347     glEnable(surface->texture_target);
6348     checkGLcall("glEnable(surface->texture_target)");
6349     LEAVE_GL();
6350     return WINED3D_OK;
6351 }
6352
6353 /* Context activation is done by the caller. */
6354 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6355 {
6356     ENTER_GL();
6357     glDisable(GL_TEXTURE_2D);
6358     checkGLcall("glDisable(GL_TEXTURE_2D)");
6359     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6360     {
6361         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6362         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6363     }
6364     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6365     {
6366         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6367         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6368     }
6369     LEAVE_GL();
6370 }
6371
6372 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6373         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
6374         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
6375 {
6376     enum complex_fixup src_fixup;
6377
6378     switch (blit_op)
6379     {
6380         case WINED3D_BLIT_OP_COLOR_BLIT:
6381             if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM)
6382                 return FALSE;
6383
6384             src_fixup = get_complex_fixup(src_format->color_fixup);
6385             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6386             {
6387                 TRACE("Checking support for fixup:\n");
6388                 dump_color_fixup_desc(src_format->color_fixup);
6389             }
6390
6391             if (!is_identity_fixup(dst_format->color_fixup))
6392             {
6393                 TRACE("Destination fixups are not supported\n");
6394                 return FALSE;
6395             }
6396
6397             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6398             {
6399                 TRACE("P8 fixup supported\n");
6400                 return TRUE;
6401             }
6402
6403             /* We only support identity conversions. */
6404             if (is_identity_fixup(src_format->color_fixup))
6405             {
6406                 TRACE("[OK]\n");
6407                 return TRUE;
6408             }
6409
6410             TRACE("[FAILED]\n");
6411             return FALSE;
6412
6413         case WINED3D_BLIT_OP_COLOR_FILL:
6414             if (dst_pool == WINED3D_POOL_SYSTEM_MEM)
6415                 return FALSE;
6416
6417             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6418             {
6419                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6420                     return FALSE;
6421             }
6422             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6423             {
6424                 TRACE("Color fill not supported\n");
6425                 return FALSE;
6426             }
6427
6428             /* FIXME: We should reject color fills on formats with fixups,
6429              * but this would break P8 color fills for example. */
6430
6431             return TRUE;
6432
6433         case WINED3D_BLIT_OP_DEPTH_FILL:
6434             return TRUE;
6435
6436         default:
6437             TRACE("Unsupported blit_op=%d\n", blit_op);
6438             return FALSE;
6439     }
6440 }
6441
6442 /* Do not call while under the GL lock. */
6443 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6444         const RECT *dst_rect, const struct wined3d_color *color)
6445 {
6446     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6447     struct wined3d_fb_state fb = {&dst_surface, NULL};
6448
6449     return device_clear_render_targets(device, 1, &fb,
6450             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6451 }
6452
6453 /* Do not call while under the GL lock. */
6454 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6455         struct wined3d_surface *surface, const RECT *rect, float depth)
6456 {
6457     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6458     struct wined3d_fb_state fb = {NULL, surface};
6459
6460     return device_clear_render_targets(device, 0, &fb,
6461             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6462 }
6463
6464 const struct blit_shader ffp_blit =  {
6465     ffp_blit_alloc,
6466     ffp_blit_free,
6467     ffp_blit_set,
6468     ffp_blit_unset,
6469     ffp_blit_supported,
6470     ffp_blit_color_fill,
6471     ffp_blit_depth_fill,
6472 };
6473
6474 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6475 {
6476     return WINED3D_OK;
6477 }
6478
6479 /* Context activation is done by the caller. */
6480 static void cpu_blit_free(struct wined3d_device *device)
6481 {
6482 }
6483
6484 /* Context activation is done by the caller. */
6485 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6486 {
6487     return WINED3D_OK;
6488 }
6489
6490 /* Context activation is done by the caller. */
6491 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6492 {
6493 }
6494
6495 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6496         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
6497         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
6498 {
6499     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6500     {
6501         return TRUE;
6502     }
6503
6504     return FALSE;
6505 }
6506
6507 static HRESULT surface_cpu_blt_compressed(const BYTE *src_data, BYTE *dst_data,
6508         UINT src_pitch, UINT dst_pitch, UINT update_w, UINT update_h,
6509         const struct wined3d_format *format, DWORD flags, const WINEDDBLTFX *fx)
6510 {
6511     UINT row_block_count;
6512     const BYTE *src_row;
6513     BYTE *dst_row;
6514     UINT x, y;
6515
6516     src_row = src_data;
6517     dst_row = dst_data;
6518
6519     row_block_count = (update_w + format->block_width - 1) / format->block_width;
6520
6521     if (!flags)
6522     {
6523         for (y = 0; y < update_h; y += format->block_height)
6524         {
6525             memcpy(dst_row, src_row, row_block_count * format->block_byte_count);
6526             src_row += src_pitch;
6527             dst_row += dst_pitch;
6528         }
6529
6530         return WINED3D_OK;
6531     }
6532
6533     if (flags == WINEDDBLT_DDFX && fx->dwDDFX == WINEDDBLTFX_MIRRORUPDOWN)
6534     {
6535         src_row += (((update_h / format->block_height) - 1) * src_pitch);
6536
6537         switch (format->id)
6538         {
6539             case WINED3DFMT_DXT1:
6540                 for (y = 0; y < update_h; y += format->block_height)
6541                 {
6542                     struct block
6543                     {
6544                         WORD color[2];
6545                         BYTE control_row[4];
6546                     };
6547
6548                     const struct block *s = (const struct block *)src_row;
6549                     struct block *d = (struct block *)dst_row;
6550
6551                     for (x = 0; x < row_block_count; ++x)
6552                     {
6553                         d[x].color[0] = s[x].color[0];
6554                         d[x].color[1] = s[x].color[1];
6555                         d[x].control_row[0] = s[x].control_row[3];
6556                         d[x].control_row[1] = s[x].control_row[2];
6557                         d[x].control_row[2] = s[x].control_row[1];
6558                         d[x].control_row[3] = s[x].control_row[0];
6559                     }
6560                     src_row -= src_pitch;
6561                     dst_row += dst_pitch;
6562                 }
6563                 return WINED3D_OK;
6564
6565             case WINED3DFMT_DXT3:
6566                 for (y = 0; y < update_h; y += format->block_height)
6567                 {
6568                     struct block
6569                     {
6570                         WORD alpha_row[4];
6571                         WORD color[2];
6572                         BYTE control_row[4];
6573                     };
6574
6575                     const struct block *s = (const struct block *)src_row;
6576                     struct block *d = (struct block *)dst_row;
6577
6578                     for (x = 0; x < row_block_count; ++x)
6579                     {
6580                         d[x].alpha_row[0] = s[x].alpha_row[3];
6581                         d[x].alpha_row[1] = s[x].alpha_row[2];
6582                         d[x].alpha_row[2] = s[x].alpha_row[1];
6583                         d[x].alpha_row[3] = s[x].alpha_row[0];
6584                         d[x].color[0] = s[x].color[0];
6585                         d[x].color[1] = s[x].color[1];
6586                         d[x].control_row[0] = s[x].control_row[3];
6587                         d[x].control_row[1] = s[x].control_row[2];
6588                         d[x].control_row[2] = s[x].control_row[1];
6589                         d[x].control_row[3] = s[x].control_row[0];
6590                     }
6591                     src_row -= src_pitch;
6592                     dst_row += dst_pitch;
6593                 }
6594                 return WINED3D_OK;
6595
6596             default:
6597                 FIXME("Compressed flip not implemented for format %s.\n",
6598                         debug_d3dformat(format->id));
6599                 return E_NOTIMPL;
6600         }
6601     }
6602
6603     FIXME("Unsupported blit on compressed surface (format %s, flags %#x, DDFX %#x).\n",
6604             debug_d3dformat(format->id), flags, flags & WINEDDBLT_DDFX ? fx->dwDDFX : 0);
6605
6606     return E_NOTIMPL;
6607 }
6608
6609 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6610         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6611         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter)
6612 {
6613     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6614     const struct wined3d_format *src_format, *dst_format;
6615     struct wined3d_surface *orig_src = src_surface;
6616     struct wined3d_mapped_rect dst_map, src_map;
6617     const BYTE *sbase = NULL;
6618     HRESULT hr = WINED3D_OK;
6619     const BYTE *sbuf;
6620     BYTE *dbuf;
6621     int x, y;
6622
6623     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6624             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6625             flags, fx, debug_d3dtexturefiltertype(filter));
6626
6627     if (src_surface == dst_surface)
6628     {
6629         wined3d_surface_map(dst_surface, &dst_map, NULL, 0);
6630         src_map = dst_map;
6631         src_format = dst_surface->resource.format;
6632         dst_format = src_format;
6633     }
6634     else
6635     {
6636         dst_format = dst_surface->resource.format;
6637         if (src_surface)
6638         {
6639             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6640             {
6641                 src_surface = surface_convert_format(src_surface, dst_format->id);
6642                 if (!src_surface)
6643                 {
6644                     /* The conv function writes a FIXME */
6645                     WARN("Cannot convert source surface format to dest format.\n");
6646                     goto release;
6647                 }
6648             }
6649             wined3d_surface_map(src_surface, &src_map, NULL, WINED3DLOCK_READONLY);
6650             src_format = src_surface->resource.format;
6651         }
6652         else
6653         {
6654             src_format = dst_format;
6655         }
6656
6657         wined3d_surface_map(dst_surface, &dst_map, dst_rect, 0);
6658     }
6659
6660     bpp = dst_surface->resource.format->byte_count;
6661     srcheight = src_rect->bottom - src_rect->top;
6662     srcwidth = src_rect->right - src_rect->left;
6663     dstheight = dst_rect->bottom - dst_rect->top;
6664     dstwidth = dst_rect->right - dst_rect->left;
6665     width = (dst_rect->right - dst_rect->left) * bpp;
6666
6667     if (src_surface)
6668         sbase = (BYTE *)src_map.data
6669                 + ((src_rect->top / src_format->block_height) * src_map.row_pitch)
6670                 + ((src_rect->left / src_format->block_width) * src_format->block_byte_count);
6671     if (src_surface != dst_surface)
6672         dbuf = dst_map.data;
6673     else
6674         dbuf = (BYTE *)dst_map.data
6675                 + ((dst_rect->top / dst_format->block_height) * dst_map.row_pitch)
6676                 + ((dst_rect->left / dst_format->block_width) * dst_format->block_byte_count);
6677
6678     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_BLOCKS)
6679     {
6680         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6681
6682         if (src_surface == dst_surface)
6683         {
6684             FIXME("Only plain blits supported on compressed surfaces.\n");
6685             hr = E_NOTIMPL;
6686             goto release;
6687         }
6688
6689         if (srcheight != dstheight || srcwidth != dstwidth)
6690         {
6691             WARN("Stretching not supported on compressed surfaces.\n");
6692             hr = WINED3DERR_INVALIDCALL;
6693             goto release;
6694         }
6695
6696         if (srcwidth & (src_format->block_width - 1) || srcheight & (src_format->block_height - 1))
6697         {
6698             WARN("Rectangle not block-aligned.\n");
6699             hr = WINED3DERR_INVALIDCALL;
6700             goto release;
6701         }
6702
6703         hr = surface_cpu_blt_compressed(sbase, dbuf,
6704                 src_map.row_pitch, dst_map.row_pitch, dstwidth, dstheight,
6705                 src_format, flags, fx);
6706         goto release;
6707     }
6708
6709     /* First, all the 'source-less' blits */
6710     if (flags & WINEDDBLT_COLORFILL)
6711     {
6712         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, fx->u5.dwFillColor);
6713         flags &= ~WINEDDBLT_COLORFILL;
6714     }
6715
6716     if (flags & WINEDDBLT_DEPTHFILL)
6717     {
6718         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6719     }
6720     if (flags & WINEDDBLT_ROP)
6721     {
6722         /* Catch some degenerate cases here. */
6723         switch (fx->dwROP)
6724         {
6725             case BLACKNESS:
6726                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, 0);
6727                 break;
6728             case 0xAA0029: /* No-op */
6729                 break;
6730             case WHITENESS:
6731                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, ~0U);
6732                 break;
6733             case SRCCOPY: /* Well, we do that below? */
6734                 break;
6735             default:
6736                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6737                 goto error;
6738         }
6739         flags &= ~WINEDDBLT_ROP;
6740     }
6741     if (flags & WINEDDBLT_DDROPS)
6742     {
6743         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6744     }
6745     /* Now the 'with source' blits. */
6746     if (src_surface)
6747     {
6748         int sx, xinc, sy, yinc;
6749
6750         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6751             goto release;
6752
6753         if (filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT
6754                 && (srcwidth != dstwidth || srcheight != dstheight))
6755         {
6756             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6757             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6758         }
6759
6760         xinc = (srcwidth << 16) / dstwidth;
6761         yinc = (srcheight << 16) / dstheight;
6762
6763         if (!flags)
6764         {
6765             /* No effects, we can cheat here. */
6766             if (dstwidth == srcwidth)
6767             {
6768                 if (dstheight == srcheight)
6769                 {
6770                     /* No stretching in either direction. This needs to be as
6771                      * fast as possible. */
6772                     sbuf = sbase;
6773
6774                     /* Check for overlapping surfaces. */
6775                     if (src_surface != dst_surface || dst_rect->top < src_rect->top
6776                             || dst_rect->right <= src_rect->left || src_rect->right <= dst_rect->left)
6777                     {
6778                         /* No overlap, or dst above src, so copy from top downwards. */
6779                         for (y = 0; y < dstheight; ++y)
6780                         {
6781                             memcpy(dbuf, sbuf, width);
6782                             sbuf += src_map.row_pitch;
6783                             dbuf += dst_map.row_pitch;
6784                         }
6785                     }
6786                     else if (dst_rect->top > src_rect->top)
6787                     {
6788                         /* Copy from bottom upwards. */
6789                         sbuf += src_map.row_pitch * dstheight;
6790                         dbuf += dst_map.row_pitch * dstheight;
6791                         for (y = 0; y < dstheight; ++y)
6792                         {
6793                             sbuf -= src_map.row_pitch;
6794                             dbuf -= dst_map.row_pitch;
6795                             memcpy(dbuf, sbuf, width);
6796                         }
6797                     }
6798                     else
6799                     {
6800                         /* Src and dst overlapping on the same line, use memmove. */
6801                         for (y = 0; y < dstheight; ++y)
6802                         {
6803                             memmove(dbuf, sbuf, width);
6804                             sbuf += src_map.row_pitch;
6805                             dbuf += dst_map.row_pitch;
6806                         }
6807                     }
6808                 }
6809                 else
6810                 {
6811                     /* Stretching in y direction only. */
6812                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6813                     {
6814                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6815                         memcpy(dbuf, sbuf, width);
6816                         dbuf += dst_map.row_pitch;
6817                     }
6818                 }
6819             }
6820             else
6821             {
6822                 /* Stretching in X direction. */
6823                 int last_sy = -1;
6824                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6825                 {
6826                     sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6827
6828                     if ((sy >> 16) == (last_sy >> 16))
6829                     {
6830                         /* This source row is the same as last source row -
6831                          * Copy the already stretched row. */
6832                         memcpy(dbuf, dbuf - dst_map.row_pitch, width);
6833                     }
6834                     else
6835                     {
6836 #define STRETCH_ROW(type) \
6837 do { \
6838     const type *s = (const type *)sbuf; \
6839     type *d = (type *)dbuf; \
6840     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6841         d[x] = s[sx >> 16]; \
6842 } while(0)
6843
6844                         switch(bpp)
6845                         {
6846                             case 1:
6847                                 STRETCH_ROW(BYTE);
6848                                 break;
6849                             case 2:
6850                                 STRETCH_ROW(WORD);
6851                                 break;
6852                             case 4:
6853                                 STRETCH_ROW(DWORD);
6854                                 break;
6855                             case 3:
6856                             {
6857                                 const BYTE *s;
6858                                 BYTE *d = dbuf;
6859                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6860                                 {
6861                                     DWORD pixel;
6862
6863                                     s = sbuf + 3 * (sx >> 16);
6864                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6865                                     d[0] = (pixel      ) & 0xff;
6866                                     d[1] = (pixel >>  8) & 0xff;
6867                                     d[2] = (pixel >> 16) & 0xff;
6868                                     d += 3;
6869                                 }
6870                                 break;
6871                             }
6872                             default:
6873                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6874                                 hr = WINED3DERR_NOTAVAILABLE;
6875                                 goto error;
6876                         }
6877 #undef STRETCH_ROW
6878                     }
6879                     dbuf += dst_map.row_pitch;
6880                     last_sy = sy;
6881                 }
6882             }
6883         }
6884         else
6885         {
6886             LONG dstyinc = dst_map.row_pitch, dstxinc = bpp;
6887             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6888             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6889             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6890             {
6891                 /* The color keying flags are checked for correctness in ddraw */
6892                 if (flags & WINEDDBLT_KEYSRC)
6893                 {
6894                     keylow  = src_surface->src_blt_color_key.color_space_low_value;
6895                     keyhigh = src_surface->src_blt_color_key.color_space_high_value;
6896                 }
6897                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6898                 {
6899                     keylow = fx->ddckSrcColorkey.color_space_low_value;
6900                     keyhigh = fx->ddckSrcColorkey.color_space_high_value;
6901                 }
6902
6903                 if (flags & WINEDDBLT_KEYDEST)
6904                 {
6905                     /* Destination color keys are taken from the source surface! */
6906                     destkeylow = src_surface->dst_blt_color_key.color_space_low_value;
6907                     destkeyhigh = src_surface->dst_blt_color_key.color_space_high_value;
6908                 }
6909                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6910                 {
6911                     destkeylow = fx->ddckDestColorkey.color_space_low_value;
6912                     destkeyhigh = fx->ddckDestColorkey.color_space_high_value;
6913                 }
6914
6915                 if (bpp == 1)
6916                 {
6917                     keymask = 0xff;
6918                 }
6919                 else
6920                 {
6921                     keymask = src_format->red_mask
6922                             | src_format->green_mask
6923                             | src_format->blue_mask;
6924                 }
6925                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6926             }
6927
6928             if (flags & WINEDDBLT_DDFX)
6929             {
6930                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6931                 LONG tmpxy;
6932                 dTopLeft     = dbuf;
6933                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6934                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dst_map.row_pitch);
6935                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6936
6937                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6938                 {
6939                     /* I don't think we need to do anything about this flag */
6940                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6941                 }
6942                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6943                 {
6944                     tmp          = dTopRight;
6945                     dTopRight    = dTopLeft;
6946                     dTopLeft     = tmp;
6947                     tmp          = dBottomRight;
6948                     dBottomRight = dBottomLeft;
6949                     dBottomLeft  = tmp;
6950                     dstxinc = dstxinc * -1;
6951                 }
6952                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6953                 {
6954                     tmp          = dTopLeft;
6955                     dTopLeft     = dBottomLeft;
6956                     dBottomLeft  = tmp;
6957                     tmp          = dTopRight;
6958                     dTopRight    = dBottomRight;
6959                     dBottomRight = tmp;
6960                     dstyinc = dstyinc * -1;
6961                 }
6962                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6963                 {
6964                     /* I don't think we need to do anything about this flag */
6965                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6966                 }
6967                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6968                 {
6969                     tmp          = dBottomRight;
6970                     dBottomRight = dTopLeft;
6971                     dTopLeft     = tmp;
6972                     tmp          = dBottomLeft;
6973                     dBottomLeft  = dTopRight;
6974                     dTopRight    = tmp;
6975                     dstxinc = dstxinc * -1;
6976                     dstyinc = dstyinc * -1;
6977                 }
6978                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6979                 {
6980                     tmp          = dTopLeft;
6981                     dTopLeft     = dBottomLeft;
6982                     dBottomLeft  = dBottomRight;
6983                     dBottomRight = dTopRight;
6984                     dTopRight    = tmp;
6985                     tmpxy   = dstxinc;
6986                     dstxinc = dstyinc;
6987                     dstyinc = tmpxy;
6988                     dstxinc = dstxinc * -1;
6989                 }
6990                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6991                 {
6992                     tmp          = dTopLeft;
6993                     dTopLeft     = dTopRight;
6994                     dTopRight    = dBottomRight;
6995                     dBottomRight = dBottomLeft;
6996                     dBottomLeft  = tmp;
6997                     tmpxy   = dstxinc;
6998                     dstxinc = dstyinc;
6999                     dstyinc = tmpxy;
7000                     dstyinc = dstyinc * -1;
7001                 }
7002                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
7003                 {
7004                     /* I don't think we need to do anything about this flag */
7005                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
7006                 }
7007                 dbuf = dTopLeft;
7008                 flags &= ~(WINEDDBLT_DDFX);
7009             }
7010
7011 #define COPY_COLORKEY_FX(type) \
7012 do { \
7013     const type *s; \
7014     type *d = (type *)dbuf, *dx, tmp; \
7015     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
7016     { \
7017         s = (const type *)(sbase + (sy >> 16) * src_map.row_pitch); \
7018         dx = d; \
7019         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
7020         { \
7021             tmp = s[sx >> 16]; \
7022             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
7023                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
7024             { \
7025                 dx[0] = tmp; \
7026             } \
7027             dx = (type *)(((BYTE *)dx) + dstxinc); \
7028         } \
7029         d = (type *)(((BYTE *)d) + dstyinc); \
7030     } \
7031 } while(0)
7032
7033             switch (bpp)
7034             {
7035                 case 1:
7036                     COPY_COLORKEY_FX(BYTE);
7037                     break;
7038                 case 2:
7039                     COPY_COLORKEY_FX(WORD);
7040                     break;
7041                 case 4:
7042                     COPY_COLORKEY_FX(DWORD);
7043                     break;
7044                 case 3:
7045                 {
7046                     const BYTE *s;
7047                     BYTE *d = dbuf, *dx;
7048                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
7049                     {
7050                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
7051                         dx = d;
7052                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7053                         {
7054                             DWORD pixel, dpixel = 0;
7055                             s = sbuf + 3 * (sx>>16);
7056                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7057                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7058                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7059                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7060                             {
7061                                 dx[0] = (pixel      ) & 0xff;
7062                                 dx[1] = (pixel >>  8) & 0xff;
7063                                 dx[2] = (pixel >> 16) & 0xff;
7064                             }
7065                             dx += dstxinc;
7066                         }
7067                         d += dstyinc;
7068                     }
7069                     break;
7070                 }
7071                 default:
7072                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7073                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7074                     hr = WINED3DERR_NOTAVAILABLE;
7075                     goto error;
7076 #undef COPY_COLORKEY_FX
7077             }
7078         }
7079     }
7080
7081 error:
7082     if (flags && FIXME_ON(d3d_surface))
7083     {
7084         FIXME("\tUnsupported flags: %#x.\n", flags);
7085     }
7086
7087 release:
7088     wined3d_surface_unmap(dst_surface);
7089     if (src_surface && src_surface != dst_surface)
7090         wined3d_surface_unmap(src_surface);
7091     /* Release the converted surface, if any. */
7092     if (src_surface && src_surface != orig_src)
7093         wined3d_surface_decref(src_surface);
7094
7095     return hr;
7096 }
7097
7098 /* Do not call while under the GL lock. */
7099 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7100         const RECT *dst_rect, const struct wined3d_color *color)
7101 {
7102     static const RECT src_rect;
7103     WINEDDBLTFX BltFx;
7104
7105     memset(&BltFx, 0, sizeof(BltFx));
7106     BltFx.dwSize = sizeof(BltFx);
7107     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7108     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7109             WINEDDBLT_COLORFILL, &BltFx, WINED3D_TEXF_POINT);
7110 }
7111
7112 /* Do not call while under the GL lock. */
7113 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7114         struct wined3d_surface *surface, const RECT *rect, float depth)
7115 {
7116     FIXME("Depth filling not implemented by cpu_blit.\n");
7117     return WINED3DERR_INVALIDCALL;
7118 }
7119
7120 const struct blit_shader cpu_blit =  {
7121     cpu_blit_alloc,
7122     cpu_blit_free,
7123     cpu_blit_set,
7124     cpu_blit_unset,
7125     cpu_blit_supported,
7126     cpu_blit_color_fill,
7127     cpu_blit_depth_fill,
7128 };
7129
7130 static HRESULT surface_init(struct wined3d_surface *surface, enum wined3d_surface_type surface_type, UINT alignment,
7131         UINT width, UINT height, UINT level, enum wined3d_multisample_type multisample_type,
7132         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7133         enum wined3d_pool pool, DWORD flags, void *parent, const struct wined3d_parent_ops *parent_ops)
7134 {
7135     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7136     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7137     BOOL lockable = flags & WINED3D_SURFACE_MAPPABLE;
7138     unsigned int resource_size;
7139     HRESULT hr;
7140
7141     if (multisample_quality > 0)
7142     {
7143         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7144         multisample_quality = 0;
7145     }
7146
7147     /* Quick lockable sanity check.
7148      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7149      * this function is too deep to need to care about things like this.
7150      * Levels need to be checked too, since they all affect what can be done. */
7151     switch (pool)
7152     {
7153         case WINED3D_POOL_SCRATCH:
7154             if (!lockable)
7155             {
7156                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7157                         "which are mutually exclusive, setting lockable to TRUE.\n");
7158                 lockable = TRUE;
7159             }
7160             break;
7161
7162         case WINED3D_POOL_SYSTEM_MEM:
7163             if (!lockable)
7164                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7165             break;
7166
7167         case WINED3D_POOL_MANAGED:
7168             if (usage & WINED3DUSAGE_DYNAMIC)
7169                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7170             break;
7171
7172         case WINED3D_POOL_DEFAULT:
7173             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7174                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7175             break;
7176
7177         default:
7178             FIXME("Unknown pool %#x.\n", pool);
7179             break;
7180     };
7181
7182     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3D_POOL_DEFAULT)
7183         FIXME("Trying to create a render target that isn't in the default pool.\n");
7184
7185     /* FIXME: Check that the format is supported by the device. */
7186
7187     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7188     if (!resource_size)
7189         return WINED3DERR_INVALIDCALL;
7190
7191     surface->surface_type = surface_type;
7192
7193     switch (surface_type)
7194     {
7195         case WINED3D_SURFACE_TYPE_OPENGL:
7196             surface->surface_ops = &surface_ops;
7197             break;
7198
7199         case WINED3D_SURFACE_TYPE_GDI:
7200             surface->surface_ops = &gdi_surface_ops;
7201             break;
7202
7203         default:
7204             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7205             return WINED3DERR_INVALIDCALL;
7206     }
7207
7208     hr = resource_init(&surface->resource, device, WINED3D_RTYPE_SURFACE, format,
7209             multisample_type, multisample_quality, usage, pool, width, height, 1,
7210             resource_size, parent, parent_ops, &surface_resource_ops);
7211     if (FAILED(hr))
7212     {
7213         WARN("Failed to initialize resource, returning %#x.\n", hr);
7214         return hr;
7215     }
7216
7217     /* "Standalone" surface. */
7218     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7219
7220     surface->texture_level = level;
7221     list_init(&surface->overlays);
7222
7223     /* Flags */
7224     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7225     if (flags & WINED3D_SURFACE_DISCARD)
7226         surface->flags |= SFLAG_DISCARD;
7227     if (flags & WINED3D_SURFACE_PIN_SYSMEM)
7228         surface->flags |= SFLAG_PIN_SYSMEM;
7229     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7230         surface->flags |= SFLAG_LOCKABLE;
7231     /* I'm not sure if this qualifies as a hack or as an optimization. It
7232      * seems reasonable to assume that lockable render targets will get
7233      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7234      * creation. However, the other reason we want to do this is that several
7235      * ddraw applications access surface memory while the surface isn't
7236      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7237      * future locks prevents these from crashing. */
7238     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7239         surface->flags |= SFLAG_DYNLOCK;
7240
7241     /* Mark the texture as dirty so that it gets loaded first time around. */
7242     surface_add_dirty_rect(surface, NULL);
7243     list_init(&surface->renderbuffers);
7244
7245     TRACE("surface %p, memory %p, size %u\n",
7246             surface, surface->resource.allocatedMemory, surface->resource.size);
7247
7248     /* Call the private setup routine */
7249     hr = surface->surface_ops->surface_private_setup(surface);
7250     if (FAILED(hr))
7251     {
7252         ERR("Private setup failed, returning %#x\n", hr);
7253         surface_cleanup(surface);
7254         return hr;
7255     }
7256
7257     /* Similar to lockable rendertargets above, creating the DIB section
7258      * during surface initialization prevents the sysmem pointer from changing
7259      * after a wined3d_surface_getdc() call. */
7260     if ((usage & WINED3DUSAGE_OWNDC) && !surface->hDC
7261             && SUCCEEDED(surface_create_dib_section(surface)))
7262     {
7263         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
7264         surface->resource.heapMemory = NULL;
7265         surface->resource.allocatedMemory = surface->dib.bitmap_data;
7266     }
7267
7268     return hr;
7269 }
7270
7271 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7272         enum wined3d_format_id format_id, UINT level, DWORD usage, enum wined3d_pool pool,
7273         enum wined3d_multisample_type multisample_type, DWORD multisample_quality,
7274         enum wined3d_surface_type surface_type, DWORD flags, void *parent,
7275         const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7276 {
7277     struct wined3d_surface *object;
7278     HRESULT hr;
7279
7280     TRACE("device %p, width %u, height %u, format %s, level %u\n",
7281             device, width, height, debug_d3dformat(format_id), level);
7282     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7283             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7284     TRACE("surface_type %#x, flags %#x, parent %p, parent_ops %p.\n", surface_type, flags, parent, parent_ops);
7285
7286     if (surface_type == WINED3D_SURFACE_TYPE_OPENGL && !device->adapter)
7287     {
7288         ERR("OpenGL surfaces are not available without OpenGL.\n");
7289         return WINED3DERR_NOTAVAILABLE;
7290     }
7291
7292     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7293     if (!object)
7294     {
7295         ERR("Failed to allocate surface memory.\n");
7296         return WINED3DERR_OUTOFVIDEOMEMORY;
7297     }
7298
7299     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level,
7300             multisample_type, multisample_quality, device, usage, format_id, pool, flags, parent, parent_ops);
7301     if (FAILED(hr))
7302     {
7303         WARN("Failed to initialize surface, returning %#x.\n", hr);
7304         HeapFree(GetProcessHeap(), 0, object);
7305         return hr;
7306     }
7307
7308     TRACE("Created surface %p.\n", object);
7309     *surface = object;
7310
7311     return hr;
7312 }