wined3d: Only update the drawable on surface unmap for the frontbuffer.
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2011 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         enum wined3d_texture_filter_type filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     struct wined3d_surface *overlay, *cur;
46
47     TRACE("surface %p.\n", surface);
48
49     if (surface->texture_name || (surface->flags & SFLAG_PBO)
50              || surface->rb_multisample || surface->rb_resolved
51              || !list_empty(&surface->renderbuffers))
52     {
53         struct wined3d_renderbuffer_entry *entry, *entry2;
54         const struct wined3d_gl_info *gl_info;
55         struct wined3d_context *context;
56
57         context = context_acquire(surface->resource.device, NULL);
58         gl_info = context->gl_info;
59
60         ENTER_GL();
61
62         if (surface->texture_name)
63         {
64             TRACE("Deleting texture %u.\n", surface->texture_name);
65             glDeleteTextures(1, &surface->texture_name);
66         }
67
68         if (surface->flags & SFLAG_PBO)
69         {
70             TRACE("Deleting PBO %u.\n", surface->pbo);
71             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
72         }
73
74         if (surface->rb_multisample)
75         {
76             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
77             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
78         }
79
80         if (surface->rb_resolved)
81         {
82             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
83             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
84         }
85
86         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
87         {
88             TRACE("Deleting renderbuffer %u.\n", entry->id);
89             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
90             HeapFree(GetProcessHeap(), 0, entry);
91         }
92
93         LEAVE_GL();
94
95         context_release(context);
96     }
97
98     if (surface->flags & SFLAG_DIBSECTION)
99     {
100         DeleteDC(surface->hDC);
101         DeleteObject(surface->dib.DIBsection);
102         surface->dib.bitmap_data = NULL;
103         surface->resource.allocatedMemory = NULL;
104     }
105
106     if (surface->flags & SFLAG_USERPTR)
107         wined3d_surface_set_mem(surface, NULL);
108     if (surface->overlay_dest)
109         list_remove(&surface->overlay_entry);
110
111     LIST_FOR_EACH_ENTRY_SAFE(overlay, cur, &surface->overlays, struct wined3d_surface, overlay_entry)
112     {
113         list_remove(&overlay->overlay_entry);
114         overlay->overlay_dest = NULL;
115     }
116
117     resource_cleanup(&surface->resource);
118 }
119
120 void surface_update_draw_binding(struct wined3d_surface *surface)
121 {
122     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
123         surface->draw_binding = SFLAG_INDRAWABLE;
124     else if (surface->resource.multisample_type)
125         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
126     else
127         surface->draw_binding = SFLAG_INTEXTURE;
128 }
129
130 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
131 {
132     TRACE("surface %p, container %p.\n", surface, container);
133
134     if (!container && type != WINED3D_CONTAINER_NONE)
135         ERR("Setting NULL container of type %#x.\n", type);
136
137     if (type == WINED3D_CONTAINER_SWAPCHAIN)
138     {
139         surface->get_drawable_size = get_drawable_size_swapchain;
140     }
141     else
142     {
143         switch (wined3d_settings.offscreen_rendering_mode)
144         {
145             case ORM_FBO:
146                 surface->get_drawable_size = get_drawable_size_fbo;
147                 break;
148
149             case ORM_BACKBUFFER:
150                 surface->get_drawable_size = get_drawable_size_backbuffer;
151                 break;
152
153             default:
154                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
155                 return;
156         }
157     }
158
159     surface->container.type = type;
160     surface->container.u.base = container;
161     surface_update_draw_binding(surface);
162 }
163
164 struct blt_info
165 {
166     GLenum binding;
167     GLenum bind_target;
168     enum tex_types tex_type;
169     GLfloat coords[4][3];
170 };
171
172 struct float_rect
173 {
174     float l;
175     float t;
176     float r;
177     float b;
178 };
179
180 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
181 {
182     f->l = ((r->left * 2.0f) / w) - 1.0f;
183     f->t = ((r->top * 2.0f) / h) - 1.0f;
184     f->r = ((r->right * 2.0f) / w) - 1.0f;
185     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
186 }
187
188 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
189 {
190     GLfloat (*coords)[3] = info->coords;
191     struct float_rect f;
192
193     switch (target)
194     {
195         default:
196             FIXME("Unsupported texture target %#x\n", target);
197             /* Fall back to GL_TEXTURE_2D */
198         case GL_TEXTURE_2D:
199             info->binding = GL_TEXTURE_BINDING_2D;
200             info->bind_target = GL_TEXTURE_2D;
201             info->tex_type = tex_2d;
202             coords[0][0] = (float)rect->left / w;
203             coords[0][1] = (float)rect->top / h;
204             coords[0][2] = 0.0f;
205
206             coords[1][0] = (float)rect->right / w;
207             coords[1][1] = (float)rect->top / h;
208             coords[1][2] = 0.0f;
209
210             coords[2][0] = (float)rect->left / w;
211             coords[2][1] = (float)rect->bottom / h;
212             coords[2][2] = 0.0f;
213
214             coords[3][0] = (float)rect->right / w;
215             coords[3][1] = (float)rect->bottom / h;
216             coords[3][2] = 0.0f;
217             break;
218
219         case GL_TEXTURE_RECTANGLE_ARB:
220             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
221             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
222             info->tex_type = tex_rect;
223             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
224             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
225             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
226             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
227             break;
228
229         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
230             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
231             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
232             info->tex_type = tex_cube;
233             cube_coords_float(rect, w, h, &f);
234
235             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
236             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
237             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
238             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
239             break;
240
241         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
242             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
243             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
244             info->tex_type = tex_cube;
245             cube_coords_float(rect, w, h, &f);
246
247             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
248             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
249             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
250             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
251             break;
252
253         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
254             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
255             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
256             info->tex_type = tex_cube;
257             cube_coords_float(rect, w, h, &f);
258
259             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
260             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
261             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
262             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
263             break;
264
265         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
266             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
267             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
268             info->tex_type = tex_cube;
269             cube_coords_float(rect, w, h, &f);
270
271             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
272             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
273             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
274             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
275             break;
276
277         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
278             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
279             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
280             info->tex_type = tex_cube;
281             cube_coords_float(rect, w, h, &f);
282
283             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
284             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
285             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
286             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
287             break;
288
289         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
290             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
291             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
292             info->tex_type = tex_cube;
293             cube_coords_float(rect, w, h, &f);
294
295             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
296             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
297             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
298             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
299             break;
300     }
301 }
302
303 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
304 {
305     if (rect_in)
306         *rect_out = *rect_in;
307     else
308     {
309         rect_out->left = 0;
310         rect_out->top = 0;
311         rect_out->right = surface->resource.width;
312         rect_out->bottom = surface->resource.height;
313     }
314 }
315
316 /* GL locking and context activation is done by the caller */
317 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
318         const RECT *src_rect, const RECT *dst_rect, enum wined3d_texture_filter_type filter)
319 {
320     struct blt_info info;
321
322     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
323
324     glEnable(info.bind_target);
325     checkGLcall("glEnable(bind_target)");
326
327     context_bind_texture(context, info.bind_target, src_surface->texture_name);
328
329     /* Filtering for StretchRect */
330     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
331             wined3d_gl_mag_filter(magLookup, filter));
332     checkGLcall("glTexParameteri");
333     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
334             wined3d_gl_min_mip_filter(minMipLookup, filter, WINED3D_TEXF_NONE));
335     checkGLcall("glTexParameteri");
336     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
337     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
338     if (context->gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
339         glTexParameteri(info.bind_target, GL_TEXTURE_SRGB_DECODE_EXT, GL_SKIP_DECODE_EXT);
340     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
341     checkGLcall("glTexEnvi");
342
343     /* Draw a quad */
344     glBegin(GL_TRIANGLE_STRIP);
345     glTexCoord3fv(info.coords[0]);
346     glVertex2i(dst_rect->left, dst_rect->top);
347
348     glTexCoord3fv(info.coords[1]);
349     glVertex2i(dst_rect->right, dst_rect->top);
350
351     glTexCoord3fv(info.coords[2]);
352     glVertex2i(dst_rect->left, dst_rect->bottom);
353
354     glTexCoord3fv(info.coords[3]);
355     glVertex2i(dst_rect->right, dst_rect->bottom);
356     glEnd();
357
358     /* Unbind the texture */
359     context_bind_texture(context, info.bind_target, 0);
360
361     /* We changed the filtering settings on the texture. Inform the
362      * container about this to get the filters reset properly next draw. */
363     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
364     {
365         struct wined3d_texture *texture = src_surface->container.u.texture;
366         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3D_TEXF_POINT;
367         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3D_TEXF_POINT;
368         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3D_TEXF_NONE;
369         texture->texture_rgb.states[WINED3DTEXSTA_SRGBTEXTURE] = FALSE;
370     }
371 }
372
373 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
374 {
375     const struct wined3d_format *format = surface->resource.format;
376     SYSTEM_INFO sysInfo;
377     BITMAPINFO *b_info;
378     int extraline = 0;
379     DWORD *masks;
380     UINT usage;
381     HDC dc;
382
383     TRACE("surface %p.\n", surface);
384
385     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
386     {
387         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
388         return WINED3DERR_INVALIDCALL;
389     }
390
391     switch (format->byte_count)
392     {
393         case 2:
394         case 4:
395             /* Allocate extra space to store the RGB bit masks. */
396             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
397             break;
398
399         case 3:
400             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
401             break;
402
403         default:
404             /* Allocate extra space for a palette. */
405             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
406                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
407             break;
408     }
409
410     if (!b_info)
411         return E_OUTOFMEMORY;
412
413     /* Some applications access the surface in via DWORDs, and do not take
414      * the necessary care at the end of the surface. So we need at least
415      * 4 extra bytes at the end of the surface. Check against the page size,
416      * if the last page used for the surface has at least 4 spare bytes we're
417      * safe, otherwise add an extra line to the DIB section. */
418     GetSystemInfo(&sysInfo);
419     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
420     {
421         extraline = 1;
422         TRACE("Adding an extra line to the DIB section.\n");
423     }
424
425     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
426     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
427     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
428     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
429     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
430             * wined3d_surface_get_pitch(surface);
431     b_info->bmiHeader.biPlanes = 1;
432     b_info->bmiHeader.biBitCount = format->byte_count * 8;
433
434     b_info->bmiHeader.biXPelsPerMeter = 0;
435     b_info->bmiHeader.biYPelsPerMeter = 0;
436     b_info->bmiHeader.biClrUsed = 0;
437     b_info->bmiHeader.biClrImportant = 0;
438
439     /* Get the bit masks */
440     masks = (DWORD *)b_info->bmiColors;
441     switch (surface->resource.format->id)
442     {
443         case WINED3DFMT_B8G8R8_UNORM:
444             usage = DIB_RGB_COLORS;
445             b_info->bmiHeader.biCompression = BI_RGB;
446             break;
447
448         case WINED3DFMT_B5G5R5X1_UNORM:
449         case WINED3DFMT_B5G5R5A1_UNORM:
450         case WINED3DFMT_B4G4R4A4_UNORM:
451         case WINED3DFMT_B4G4R4X4_UNORM:
452         case WINED3DFMT_B2G3R3_UNORM:
453         case WINED3DFMT_B2G3R3A8_UNORM:
454         case WINED3DFMT_R10G10B10A2_UNORM:
455         case WINED3DFMT_R8G8B8A8_UNORM:
456         case WINED3DFMT_R8G8B8X8_UNORM:
457         case WINED3DFMT_B10G10R10A2_UNORM:
458         case WINED3DFMT_B5G6R5_UNORM:
459         case WINED3DFMT_R16G16B16A16_UNORM:
460             usage = 0;
461             b_info->bmiHeader.biCompression = BI_BITFIELDS;
462             masks[0] = format->red_mask;
463             masks[1] = format->green_mask;
464             masks[2] = format->blue_mask;
465             break;
466
467         default:
468             /* Don't know palette */
469             b_info->bmiHeader.biCompression = BI_RGB;
470             usage = 0;
471             break;
472     }
473
474     if (!(dc = GetDC(0)))
475     {
476         HeapFree(GetProcessHeap(), 0, b_info);
477         return HRESULT_FROM_WIN32(GetLastError());
478     }
479
480     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
481             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
482             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
483     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
484     ReleaseDC(0, dc);
485
486     if (!surface->dib.DIBsection)
487     {
488         ERR("Failed to create DIB section.\n");
489         HeapFree(GetProcessHeap(), 0, b_info);
490         return HRESULT_FROM_WIN32(GetLastError());
491     }
492
493     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
494     /* Copy the existing surface to the dib section. */
495     if (surface->resource.allocatedMemory)
496     {
497         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
498                 surface->resource.height * wined3d_surface_get_pitch(surface));
499     }
500     else
501     {
502         /* This is to make maps read the GL texture although memory is allocated. */
503         surface->flags &= ~SFLAG_INSYSMEM;
504     }
505     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
506
507     HeapFree(GetProcessHeap(), 0, b_info);
508
509     /* Now allocate a DC. */
510     surface->hDC = CreateCompatibleDC(0);
511     SelectObject(surface->hDC, surface->dib.DIBsection);
512     TRACE("Using wined3d palette %p.\n", surface->palette);
513     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
514
515     surface->flags |= SFLAG_DIBSECTION;
516
517     return WINED3D_OK;
518 }
519
520 static BOOL surface_need_pbo(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
521 {
522     if (surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
523         return FALSE;
524     if (!(surface->flags & SFLAG_DYNLOCK))
525         return FALSE;
526     if (surface->flags & (SFLAG_CONVERTED | SFLAG_NONPOW2 | SFLAG_PIN_SYSMEM))
527         return FALSE;
528     if (!gl_info->supported[ARB_PIXEL_BUFFER_OBJECT])
529         return FALSE;
530
531     return TRUE;
532 }
533
534 static void surface_load_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
535 {
536     struct wined3d_context *context;
537     GLenum error;
538
539     context = context_acquire(surface->resource.device, NULL);
540     ENTER_GL();
541
542     GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
543     error = glGetError();
544     if (!surface->pbo || error != GL_NO_ERROR)
545         ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
546
547     TRACE("Binding PBO %u.\n", surface->pbo);
548
549     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
550     checkGLcall("glBindBufferARB");
551
552     GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
553             surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
554     checkGLcall("glBufferDataARB");
555
556     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
557     checkGLcall("glBindBufferARB");
558
559     /* We don't need the system memory anymore and we can't even use it for PBOs. */
560     if (!(surface->flags & SFLAG_CLIENT))
561     {
562         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
563         surface->resource.heapMemory = NULL;
564     }
565     surface->resource.allocatedMemory = NULL;
566     surface->flags |= SFLAG_PBO;
567     LEAVE_GL();
568     context_release(context);
569 }
570
571 static void surface_prepare_system_memory(struct wined3d_surface *surface)
572 {
573     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
574
575     TRACE("surface %p.\n", surface);
576
577     if (!(surface->flags & SFLAG_PBO) && surface_need_pbo(surface, gl_info))
578         surface_load_pbo(surface, gl_info);
579     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
580     {
581         /* Whatever surface we have, make sure that there is memory allocated
582          * for the downloaded copy, or a PBO to map. */
583         if (!surface->resource.heapMemory)
584             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
585
586         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
587                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
588
589         if (surface->flags & SFLAG_INSYSMEM)
590             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
591     }
592 }
593
594 static void surface_evict_sysmem(struct wined3d_surface *surface)
595 {
596     if (surface->flags & SFLAG_DONOTFREE)
597         return;
598
599     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
600     surface->resource.allocatedMemory = NULL;
601     surface->resource.heapMemory = NULL;
602     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
603 }
604
605 /* Context activation is done by the caller. */
606 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
607         struct wined3d_context *context, BOOL srgb)
608 {
609     struct wined3d_device *device = surface->resource.device;
610     DWORD active_sampler;
611
612     /* We don't need a specific texture unit, but after binding the texture
613      * the current unit is dirty. Read the unit back instead of switching to
614      * 0, this avoids messing around with the state manager's GL states. The
615      * current texture unit should always be a valid one.
616      *
617      * To be more specific, this is tricky because we can implicitly be
618      * called from sampler() in state.c. This means we can't touch anything
619      * other than whatever happens to be the currently active texture, or we
620      * would risk marking already applied sampler states dirty again. */
621     active_sampler = device->rev_tex_unit_map[context->active_texture];
622
623     if (active_sampler != WINED3D_UNMAPPED_STAGE)
624         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
625     surface_bind(surface, context, srgb);
626 }
627
628 static void surface_force_reload(struct wined3d_surface *surface)
629 {
630     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
631 }
632
633 static void surface_release_client_storage(struct wined3d_surface *surface)
634 {
635     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
636
637     ENTER_GL();
638     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
639     if (surface->texture_name)
640     {
641         surface_bind_and_dirtify(surface, context, FALSE);
642         glTexImage2D(surface->texture_target, surface->texture_level,
643                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
644     }
645     if (surface->texture_name_srgb)
646     {
647         surface_bind_and_dirtify(surface, context, TRUE);
648         glTexImage2D(surface->texture_target, surface->texture_level,
649                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
650     }
651     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
652     LEAVE_GL();
653
654     context_release(context);
655
656     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
657     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
658     surface_force_reload(surface);
659 }
660
661 static HRESULT surface_private_setup(struct wined3d_surface *surface)
662 {
663     /* TODO: Check against the maximum texture sizes supported by the video card. */
664     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
665     unsigned int pow2Width, pow2Height;
666
667     TRACE("surface %p.\n", surface);
668
669     surface->texture_name = 0;
670     surface->texture_target = GL_TEXTURE_2D;
671
672     /* Non-power2 support */
673     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
674     {
675         pow2Width = surface->resource.width;
676         pow2Height = surface->resource.height;
677     }
678     else
679     {
680         /* Find the nearest pow2 match */
681         pow2Width = pow2Height = 1;
682         while (pow2Width < surface->resource.width)
683             pow2Width <<= 1;
684         while (pow2Height < surface->resource.height)
685             pow2Height <<= 1;
686     }
687     surface->pow2Width = pow2Width;
688     surface->pow2Height = pow2Height;
689
690     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
691     {
692         /* TODO: Add support for non power two compressed textures. */
693         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
694         {
695             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
696                   surface, surface->resource.width, surface->resource.height);
697             return WINED3DERR_NOTAVAILABLE;
698         }
699     }
700
701     if (pow2Width != surface->resource.width
702             || pow2Height != surface->resource.height)
703     {
704         surface->flags |= SFLAG_NONPOW2;
705     }
706
707     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
708             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
709     {
710         /* One of three options:
711          * 1: Do the same as we do with NPOT and scale the texture, (any
712          *    texture ops would require the texture to be scaled which is
713          *    potentially slow)
714          * 2: Set the texture to the maximum size (bad idea).
715          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
716          * 4: Create the surface, but allow it to be used only for DirectDraw
717          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
718          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
719          *    the render target. */
720         if (surface->resource.pool == WINED3D_POOL_DEFAULT || surface->resource.pool == WINED3D_POOL_MANAGED)
721         {
722             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
723             return WINED3DERR_NOTAVAILABLE;
724         }
725
726         /* We should never use this surface in combination with OpenGL! */
727         TRACE("Creating an oversized surface: %ux%u.\n",
728                 surface->pow2Width, surface->pow2Height);
729     }
730     else
731     {
732         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
733          * and EXT_PALETTED_TEXTURE is used in combination with texture
734          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
735          * EXT_PALETTED_TEXTURE doesn't work in combination with
736          * ARB_TEXTURE_RECTANGLE. */
737         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
738                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
739                 && gl_info->supported[EXT_PALETTED_TEXTURE]
740                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
741         {
742             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
743             surface->pow2Width = surface->resource.width;
744             surface->pow2Height = surface->resource.height;
745             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
746         }
747     }
748
749     switch (wined3d_settings.offscreen_rendering_mode)
750     {
751         case ORM_FBO:
752             surface->get_drawable_size = get_drawable_size_fbo;
753             break;
754
755         case ORM_BACKBUFFER:
756             surface->get_drawable_size = get_drawable_size_backbuffer;
757             break;
758
759         default:
760             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
761             return WINED3DERR_INVALIDCALL;
762     }
763
764     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
765         surface->flags |= SFLAG_DISCARDED;
766
767     return WINED3D_OK;
768 }
769
770 static void surface_realize_palette(struct wined3d_surface *surface)
771 {
772     struct wined3d_palette *palette = surface->palette;
773
774     TRACE("surface %p.\n", surface);
775
776     if (!palette) return;
777
778     if (surface->resource.format->id == WINED3DFMT_P8_UINT
779             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
780     {
781         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
782         {
783             /* Make sure the texture is up to date. This call doesn't do
784              * anything if the texture is already up to date. */
785             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
786
787             /* We want to force a palette refresh, so mark the drawable as not being up to date */
788             if (!surface_is_offscreen(surface))
789                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
790         }
791         else
792         {
793             if (!(surface->flags & SFLAG_INSYSMEM))
794             {
795                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
796                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
797             }
798             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
799         }
800     }
801
802     if (surface->flags & SFLAG_DIBSECTION)
803     {
804         RGBQUAD col[256];
805         unsigned int i;
806
807         TRACE("Updating the DC's palette.\n");
808
809         for (i = 0; i < 256; ++i)
810         {
811             col[i].rgbRed   = palette->palents[i].peRed;
812             col[i].rgbGreen = palette->palents[i].peGreen;
813             col[i].rgbBlue  = palette->palents[i].peBlue;
814             col[i].rgbReserved = 0;
815         }
816         SetDIBColorTable(surface->hDC, 0, 256, col);
817     }
818
819     /* Propagate the changes to the drawable when we have a palette. */
820     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
821         surface_load_location(surface, surface->draw_binding, NULL);
822 }
823
824 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
825 {
826     HRESULT hr;
827
828     /* If there's no destination surface there is nothing to do. */
829     if (!surface->overlay_dest)
830         return WINED3D_OK;
831
832     /* Blt calls ModifyLocation on the dest surface, which in turn calls
833      * DrawOverlay to update the overlay. Prevent an endless recursion. */
834     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
835         return WINED3D_OK;
836
837     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
838     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
839             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3D_TEXF_LINEAR);
840     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
841
842     return hr;
843 }
844
845 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
846 {
847     struct wined3d_device *device = surface->resource.device;
848     const RECT *pass_rect = rect;
849
850     TRACE("surface %p, rect %s, flags %#x.\n",
851             surface, wine_dbgstr_rect(rect), flags);
852
853     if (flags & WINED3DLOCK_DISCARD)
854     {
855         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
856         surface_prepare_system_memory(surface);
857         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
858     }
859     else
860     {
861         /* surface_load_location() does not check if the rectangle specifies
862          * the full surface. Most callers don't need that, so do it here. */
863         if (rect && !rect->top && !rect->left
864                 && rect->right == surface->resource.width
865                 && rect->bottom == surface->resource.height)
866             pass_rect = NULL;
867         surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
868     }
869
870     if (surface->flags & SFLAG_PBO)
871     {
872         const struct wined3d_gl_info *gl_info;
873         struct wined3d_context *context;
874
875         context = context_acquire(device, NULL);
876         gl_info = context->gl_info;
877
878         ENTER_GL();
879         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
880         checkGLcall("glBindBufferARB");
881
882         /* This shouldn't happen but could occur if some other function
883          * didn't handle the PBO properly. */
884         if (surface->resource.allocatedMemory)
885             ERR("The surface already has PBO memory allocated.\n");
886
887         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
888         checkGLcall("glMapBufferARB");
889
890         /* Make sure the PBO isn't set anymore in order not to break non-PBO
891          * calls. */
892         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
893         checkGLcall("glBindBufferARB");
894
895         LEAVE_GL();
896         context_release(context);
897     }
898
899     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
900     {
901         if (!rect)
902             surface_add_dirty_rect(surface, NULL);
903         else
904         {
905             struct wined3d_box b;
906
907             b.left = rect->left;
908             b.top = rect->top;
909             b.right = rect->right;
910             b.bottom = rect->bottom;
911             b.front = 0;
912             b.back = 1;
913             surface_add_dirty_rect(surface, &b);
914         }
915     }
916 }
917
918 static void surface_unmap(struct wined3d_surface *surface)
919 {
920     struct wined3d_device *device = surface->resource.device;
921     BOOL fullsurface;
922
923     TRACE("surface %p.\n", surface);
924
925     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
926
927     if (surface->flags & SFLAG_PBO)
928     {
929         const struct wined3d_gl_info *gl_info;
930         struct wined3d_context *context;
931
932         TRACE("Freeing PBO memory.\n");
933
934         context = context_acquire(device, NULL);
935         gl_info = context->gl_info;
936
937         ENTER_GL();
938         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
939         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
940         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
941         checkGLcall("glUnmapBufferARB");
942         LEAVE_GL();
943         context_release(context);
944
945         surface->resource.allocatedMemory = NULL;
946     }
947
948     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
949
950     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
951     {
952         TRACE("Not dirtified, nothing to do.\n");
953         goto done;
954     }
955
956     if ((surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
957             && surface->container.u.swapchain->front_buffer == surface)
958             || (wined3d_settings.offscreen_rendering_mode == ORM_BACKBUFFER
959             && device->fb.render_targets && surface == device->fb.render_targets[0]))
960     {
961         if (!surface->dirtyRect.left && !surface->dirtyRect.top
962                 && surface->dirtyRect.right == surface->resource.width
963                 && surface->dirtyRect.bottom == surface->resource.height)
964         {
965             fullsurface = TRUE;
966         }
967         else
968         {
969             /* TODO: Proper partial rectangle tracking. */
970             fullsurface = FALSE;
971             surface->flags |= SFLAG_INSYSMEM;
972         }
973
974         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
975
976         /* Partial rectangle tracking is not commonly implemented, it is only
977          * done for render targets. INSYSMEM was set before to tell
978          * surface_load_location() where to read the rectangle from.
979          * Indrawable is set because all modifications from the partial
980          * sysmem copy are written back to the drawable, thus the surface is
981          * merged again in the drawable. The sysmem copy is not fully up to
982          * date because only a subrectangle was read in Map(). */
983         if (!fullsurface)
984         {
985             surface_modify_location(surface, surface->draw_binding, TRUE);
986             surface_evict_sysmem(surface);
987         }
988
989         surface->dirtyRect.left = surface->resource.width;
990         surface->dirtyRect.top = surface->resource.height;
991         surface->dirtyRect.right = 0;
992         surface->dirtyRect.bottom = 0;
993     }
994     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
995     {
996         FIXME("Depth / stencil buffer locking is not implemented.\n");
997     }
998
999 done:
1000     /* Overlays have to be redrawn manually after changes with the GL implementation */
1001     if (surface->overlay_dest)
1002         surface_draw_overlay(surface);
1003 }
1004
1005 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1006 {
1007     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1008         return FALSE;
1009     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1010         return FALSE;
1011     return TRUE;
1012 }
1013
1014 static void wined3d_surface_depth_blt_fbo(const struct wined3d_device *device, struct wined3d_surface *src_surface,
1015         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1016 {
1017     const struct wined3d_gl_info *gl_info;
1018     struct wined3d_context *context;
1019     DWORD src_mask, dst_mask;
1020     GLbitfield gl_mask;
1021
1022     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1023             device, src_surface, wine_dbgstr_rect(src_rect),
1024             dst_surface, wine_dbgstr_rect(dst_rect));
1025
1026     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1027     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1028
1029     if (src_mask != dst_mask)
1030     {
1031         ERR("Incompatible formats %s and %s.\n",
1032                 debug_d3dformat(src_surface->resource.format->id),
1033                 debug_d3dformat(dst_surface->resource.format->id));
1034         return;
1035     }
1036
1037     if (!src_mask)
1038     {
1039         ERR("Not a depth / stencil format: %s.\n",
1040                 debug_d3dformat(src_surface->resource.format->id));
1041         return;
1042     }
1043
1044     gl_mask = 0;
1045     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1046         gl_mask |= GL_DEPTH_BUFFER_BIT;
1047     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1048         gl_mask |= GL_STENCIL_BUFFER_BIT;
1049
1050     /* Make sure the locations are up-to-date. Loading the destination
1051      * surface isn't required if the entire surface is overwritten. */
1052     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1053     if (!surface_is_full_rect(dst_surface, dst_rect))
1054         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1055
1056     context = context_acquire(device, NULL);
1057     if (!context->valid)
1058     {
1059         context_release(context);
1060         WARN("Invalid context, skipping blit.\n");
1061         return;
1062     }
1063
1064     gl_info = context->gl_info;
1065
1066     ENTER_GL();
1067
1068     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1069     glReadBuffer(GL_NONE);
1070     checkGLcall("glReadBuffer()");
1071     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1072
1073     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1074     context_set_draw_buffer(context, GL_NONE);
1075     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1076     context_invalidate_state(context, STATE_FRAMEBUFFER);
1077
1078     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1079     {
1080         glDepthMask(GL_TRUE);
1081         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_ZWRITEENABLE));
1082     }
1083     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1084     {
1085         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1086         {
1087             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1088             context_invalidate_state(context, STATE_RENDER(WINED3D_RS_TWOSIDEDSTENCILMODE));
1089         }
1090         glStencilMask(~0U);
1091         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_STENCILWRITEMASK));
1092     }
1093
1094     glDisable(GL_SCISSOR_TEST);
1095     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1096
1097     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1098             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1099     checkGLcall("glBlitFramebuffer()");
1100
1101     LEAVE_GL();
1102
1103     if (wined3d_settings.strict_draw_ordering)
1104         wglFlush(); /* Flush to ensure ordering across contexts. */
1105
1106     context_release(context);
1107 }
1108
1109 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1110  * Depth / stencil is not supported. */
1111 static void surface_blt_fbo(const struct wined3d_device *device, enum wined3d_texture_filter_type filter,
1112         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1113         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1114 {
1115     const struct wined3d_gl_info *gl_info;
1116     struct wined3d_context *context;
1117     RECT src_rect, dst_rect;
1118     GLenum gl_filter;
1119     GLenum buffer;
1120
1121     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1122     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1123             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1124     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1125             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1126
1127     src_rect = *src_rect_in;
1128     dst_rect = *dst_rect_in;
1129
1130     switch (filter)
1131     {
1132         case WINED3D_TEXF_LINEAR:
1133             gl_filter = GL_LINEAR;
1134             break;
1135
1136         default:
1137             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1138         case WINED3D_TEXF_NONE:
1139         case WINED3D_TEXF_POINT:
1140             gl_filter = GL_NEAREST;
1141             break;
1142     }
1143
1144     /* Resolve the source surface first if needed. */
1145     if (src_location == SFLAG_INRB_MULTISAMPLE
1146             && (src_surface->resource.format->id != dst_surface->resource.format->id
1147                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1148                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1149         src_location = SFLAG_INRB_RESOLVED;
1150
1151     /* Make sure the locations are up-to-date. Loading the destination
1152      * surface isn't required if the entire surface is overwritten. (And is
1153      * in fact harmful if we're being called by surface_load_location() with
1154      * the purpose of loading the destination surface.) */
1155     surface_load_location(src_surface, src_location, NULL);
1156     if (!surface_is_full_rect(dst_surface, &dst_rect))
1157         surface_load_location(dst_surface, dst_location, NULL);
1158
1159     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1160     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1161     else context = context_acquire(device, NULL);
1162
1163     if (!context->valid)
1164     {
1165         context_release(context);
1166         WARN("Invalid context, skipping blit.\n");
1167         return;
1168     }
1169
1170     gl_info = context->gl_info;
1171
1172     if (src_location == SFLAG_INDRAWABLE)
1173     {
1174         TRACE("Source surface %p is onscreen.\n", src_surface);
1175         buffer = surface_get_gl_buffer(src_surface);
1176         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1177     }
1178     else
1179     {
1180         TRACE("Source surface %p is offscreen.\n", src_surface);
1181         buffer = GL_COLOR_ATTACHMENT0;
1182     }
1183
1184     ENTER_GL();
1185     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1186     glReadBuffer(buffer);
1187     checkGLcall("glReadBuffer()");
1188     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1189     LEAVE_GL();
1190
1191     if (dst_location == SFLAG_INDRAWABLE)
1192     {
1193         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1194         buffer = surface_get_gl_buffer(dst_surface);
1195         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1196     }
1197     else
1198     {
1199         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1200         buffer = GL_COLOR_ATTACHMENT0;
1201     }
1202
1203     ENTER_GL();
1204     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1205     context_set_draw_buffer(context, buffer);
1206     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1207     context_invalidate_state(context, STATE_FRAMEBUFFER);
1208
1209     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1210     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE));
1211     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE1));
1212     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE2));
1213     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE3));
1214
1215     glDisable(GL_SCISSOR_TEST);
1216     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1217
1218     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1219             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1220     checkGLcall("glBlitFramebuffer()");
1221
1222     LEAVE_GL();
1223
1224     if (wined3d_settings.strict_draw_ordering
1225             || (dst_location == SFLAG_INDRAWABLE
1226             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1227         wglFlush();
1228
1229     context_release(context);
1230 }
1231
1232 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1233         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
1234         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
1235 {
1236     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1237         return FALSE;
1238
1239     /* Source and/or destination need to be on the GL side */
1240     if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM)
1241         return FALSE;
1242
1243     switch (blit_op)
1244     {
1245         case WINED3D_BLIT_OP_COLOR_BLIT:
1246             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1247                 return FALSE;
1248             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1249                 return FALSE;
1250             break;
1251
1252         case WINED3D_BLIT_OP_DEPTH_BLIT:
1253             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1254                 return FALSE;
1255             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1256                 return FALSE;
1257             break;
1258
1259         default:
1260             return FALSE;
1261     }
1262
1263     if (!(src_format->id == dst_format->id
1264             || (is_identity_fixup(src_format->color_fixup)
1265             && is_identity_fixup(dst_format->color_fixup))))
1266         return FALSE;
1267
1268     return TRUE;
1269 }
1270
1271 /* This function checks if the primary render target uses the 8bit paletted format. */
1272 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1273 {
1274     if (device->fb.render_targets && device->fb.render_targets[0])
1275     {
1276         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1277         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1278                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1279             return TRUE;
1280     }
1281     return FALSE;
1282 }
1283
1284 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1285         DWORD color, struct wined3d_color *float_color)
1286 {
1287     const struct wined3d_format *format = surface->resource.format;
1288     const struct wined3d_device *device = surface->resource.device;
1289
1290     switch (format->id)
1291     {
1292         case WINED3DFMT_P8_UINT:
1293             if (surface->palette)
1294             {
1295                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1296                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1297                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1298             }
1299             else
1300             {
1301                 float_color->r = 0.0f;
1302                 float_color->g = 0.0f;
1303                 float_color->b = 0.0f;
1304             }
1305             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1306             break;
1307
1308         case WINED3DFMT_B5G6R5_UNORM:
1309             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1310             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1311             float_color->b = (color & 0x1f) / 31.0f;
1312             float_color->a = 1.0f;
1313             break;
1314
1315         case WINED3DFMT_B8G8R8_UNORM:
1316         case WINED3DFMT_B8G8R8X8_UNORM:
1317             float_color->r = D3DCOLOR_R(color);
1318             float_color->g = D3DCOLOR_G(color);
1319             float_color->b = D3DCOLOR_B(color);
1320             float_color->a = 1.0f;
1321             break;
1322
1323         case WINED3DFMT_B8G8R8A8_UNORM:
1324             float_color->r = D3DCOLOR_R(color);
1325             float_color->g = D3DCOLOR_G(color);
1326             float_color->b = D3DCOLOR_B(color);
1327             float_color->a = D3DCOLOR_A(color);
1328             break;
1329
1330         default:
1331             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1332             return FALSE;
1333     }
1334
1335     return TRUE;
1336 }
1337
1338 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1339 {
1340     const struct wined3d_format *format = surface->resource.format;
1341
1342     switch (format->id)
1343     {
1344         case WINED3DFMT_S1_UINT_D15_UNORM:
1345             *float_depth = depth / (float)0x00007fff;
1346             break;
1347
1348         case WINED3DFMT_D16_UNORM:
1349             *float_depth = depth / (float)0x0000ffff;
1350             break;
1351
1352         case WINED3DFMT_D24_UNORM_S8_UINT:
1353         case WINED3DFMT_X8D24_UNORM:
1354             *float_depth = depth / (float)0x00ffffff;
1355             break;
1356
1357         case WINED3DFMT_D32_UNORM:
1358             *float_depth = depth / (float)0xffffffff;
1359             break;
1360
1361         default:
1362             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1363             return FALSE;
1364     }
1365
1366     return TRUE;
1367 }
1368
1369 /* Do not call while under the GL lock. */
1370 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1371 {
1372     const struct wined3d_resource *resource = &surface->resource;
1373     struct wined3d_device *device = resource->device;
1374     const struct blit_shader *blitter;
1375
1376     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1377             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1378     if (!blitter)
1379     {
1380         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1381         return WINED3DERR_INVALIDCALL;
1382     }
1383
1384     return blitter->depth_fill(device, surface, rect, depth);
1385 }
1386
1387 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1388         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1389 {
1390     struct wined3d_device *device = src_surface->resource.device;
1391
1392     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1393             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1394             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1395         return WINED3DERR_INVALIDCALL;
1396
1397     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1398
1399     surface_modify_ds_location(dst_surface, SFLAG_INTEXTURE,
1400             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1401
1402     return WINED3D_OK;
1403 }
1404
1405 /* Do not call while under the GL lock. */
1406 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1407         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1408         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter)
1409 {
1410     struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1411     struct wined3d_device *device = dst_surface->resource.device;
1412     DWORD src_ds_flags, dst_ds_flags;
1413     RECT src_rect, dst_rect;
1414     BOOL scale, convert;
1415
1416     static const DWORD simple_blit = WINEDDBLT_ASYNC
1417             | WINEDDBLT_COLORFILL
1418             | WINEDDBLT_WAIT
1419             | WINEDDBLT_DEPTHFILL
1420             | WINEDDBLT_DONOTWAIT;
1421
1422     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1423             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1424             flags, fx, debug_d3dtexturefiltertype(filter));
1425     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1426
1427     if (fx)
1428     {
1429         TRACE("dwSize %#x.\n", fx->dwSize);
1430         TRACE("dwDDFX %#x.\n", fx->dwDDFX);
1431         TRACE("dwROP %#x.\n", fx->dwROP);
1432         TRACE("dwDDROP %#x.\n", fx->dwDDROP);
1433         TRACE("dwRotationAngle %#x.\n", fx->dwRotationAngle);
1434         TRACE("dwZBufferOpCode %#x.\n", fx->dwZBufferOpCode);
1435         TRACE("dwZBufferLow %#x.\n", fx->dwZBufferLow);
1436         TRACE("dwZBufferHigh %#x.\n", fx->dwZBufferHigh);
1437         TRACE("dwZBufferBaseDest %#x.\n", fx->dwZBufferBaseDest);
1438         TRACE("dwZDestConstBitDepth %#x.\n", fx->dwZDestConstBitDepth);
1439         TRACE("lpDDSZBufferDest %p.\n", fx->u1.lpDDSZBufferDest);
1440         TRACE("dwZSrcConstBitDepth %#x.\n", fx->dwZSrcConstBitDepth);
1441         TRACE("lpDDSZBufferSrc %p.\n", fx->u2.lpDDSZBufferSrc);
1442         TRACE("dwAlphaEdgeBlendBitDepth %#x.\n", fx->dwAlphaEdgeBlendBitDepth);
1443         TRACE("dwAlphaEdgeBlend %#x.\n", fx->dwAlphaEdgeBlend);
1444         TRACE("dwReserved %#x.\n", fx->dwReserved);
1445         TRACE("dwAlphaDestConstBitDepth %#x.\n", fx->dwAlphaDestConstBitDepth);
1446         TRACE("lpDDSAlphaDest %p.\n", fx->u3.lpDDSAlphaDest);
1447         TRACE("dwAlphaSrcConstBitDepth %#x.\n", fx->dwAlphaSrcConstBitDepth);
1448         TRACE("lpDDSAlphaSrc %p.\n", fx->u4.lpDDSAlphaSrc);
1449         TRACE("lpDDSPattern %p.\n", fx->u5.lpDDSPattern);
1450         TRACE("ddckDestColorkey {%#x, %#x}.\n",
1451                 fx->ddckDestColorkey.color_space_low_value,
1452                 fx->ddckDestColorkey.color_space_high_value);
1453         TRACE("ddckSrcColorkey {%#x, %#x}.\n",
1454                 fx->ddckSrcColorkey.color_space_low_value,
1455                 fx->ddckSrcColorkey.color_space_high_value);
1456     }
1457
1458     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1459     {
1460         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1461         return WINEDDERR_SURFACEBUSY;
1462     }
1463
1464     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1465
1466     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1467             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1468             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1469             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1470             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1471     {
1472         WARN("The application gave us a bad destination rectangle.\n");
1473         return WINEDDERR_INVALIDRECT;
1474     }
1475
1476     if (src_surface)
1477     {
1478         surface_get_rect(src_surface, src_rect_in, &src_rect);
1479
1480         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1481                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1482                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1483                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1484                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1485         {
1486             WARN("Application gave us bad source rectangle for Blt.\n");
1487             return WINEDDERR_INVALIDRECT;
1488         }
1489     }
1490     else
1491     {
1492         memset(&src_rect, 0, sizeof(src_rect));
1493     }
1494
1495     if (!fx || !(fx->dwDDFX))
1496         flags &= ~WINEDDBLT_DDFX;
1497
1498     if (flags & WINEDDBLT_WAIT)
1499         flags &= ~WINEDDBLT_WAIT;
1500
1501     if (flags & WINEDDBLT_ASYNC)
1502     {
1503         static unsigned int once;
1504
1505         if (!once++)
1506             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1507         flags &= ~WINEDDBLT_ASYNC;
1508     }
1509
1510     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1511     if (flags & WINEDDBLT_DONOTWAIT)
1512     {
1513         static unsigned int once;
1514
1515         if (!once++)
1516             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1517         flags &= ~WINEDDBLT_DONOTWAIT;
1518     }
1519
1520     if (!device->d3d_initialized)
1521     {
1522         WARN("D3D not initialized, using fallback.\n");
1523         goto cpu;
1524     }
1525
1526     /* We want to avoid invalidating the sysmem location for converted
1527      * surfaces, since otherwise we'd have to convert the data back when
1528      * locking them. */
1529     if (dst_surface->flags & SFLAG_CONVERTED)
1530     {
1531         WARN("Converted surface, using CPU blit.\n");
1532         return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1533     }
1534
1535     if (flags & ~simple_blit)
1536     {
1537         WARN("Using fallback for complex blit (%#x).\n", flags);
1538         goto fallback;
1539     }
1540
1541     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1542         src_swapchain = src_surface->container.u.swapchain;
1543     else
1544         src_swapchain = NULL;
1545
1546     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1547         dst_swapchain = dst_surface->container.u.swapchain;
1548     else
1549         dst_swapchain = NULL;
1550
1551     /* This isn't strictly needed. FBO blits for example could deal with
1552      * cross-swapchain blits by first downloading the source to a texture
1553      * before switching to the destination context. We just have this here to
1554      * not have to deal with the issue, since cross-swapchain blits should be
1555      * rare. */
1556     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1557     {
1558         FIXME("Using fallback for cross-swapchain blit.\n");
1559         goto fallback;
1560     }
1561
1562     scale = src_surface
1563             && (src_rect.right - src_rect.left != dst_rect.right - dst_rect.left
1564             || src_rect.bottom - src_rect.top != dst_rect.bottom - dst_rect.top);
1565     convert = src_surface && src_surface->resource.format->id != dst_surface->resource.format->id;
1566
1567     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1568     if (src_surface)
1569         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1570     else
1571         src_ds_flags = 0;
1572
1573     if (src_ds_flags || dst_ds_flags)
1574     {
1575         if (flags & WINEDDBLT_DEPTHFILL)
1576         {
1577             float depth;
1578
1579             TRACE("Depth fill.\n");
1580
1581             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1582                 return WINED3DERR_INVALIDCALL;
1583
1584             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1585                 return WINED3D_OK;
1586         }
1587         else
1588         {
1589             if (src_ds_flags != dst_ds_flags)
1590             {
1591                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1592                 return WINED3DERR_INVALIDCALL;
1593             }
1594
1595             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1596                 return WINED3D_OK;
1597         }
1598     }
1599     else
1600     {
1601         /* In principle this would apply to depth blits as well, but we don't
1602          * implement those in the CPU blitter at the moment. */
1603         if ((dst_surface->flags & SFLAG_INSYSMEM)
1604                 && (!src_surface || (src_surface->flags & SFLAG_INSYSMEM)))
1605         {
1606             if (scale)
1607                 TRACE("Not doing sysmem blit because of scaling.\n");
1608             else if (convert)
1609                 TRACE("Not doing sysmem blit because of format conversion.\n");
1610             else
1611                 return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1612         }
1613
1614         if (flags & WINEDDBLT_COLORFILL)
1615         {
1616             struct wined3d_color color;
1617
1618             TRACE("Color fill.\n");
1619
1620             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1621                 goto fallback;
1622
1623             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1624                 return WINED3D_OK;
1625         }
1626         else
1627         {
1628             TRACE("Color blit.\n");
1629
1630             /* Upload */
1631             if ((src_surface->flags & SFLAG_INSYSMEM) && !(dst_surface->flags & SFLAG_INSYSMEM))
1632             {
1633                 if (scale)
1634                     TRACE("Not doing upload because of scaling.\n");
1635                 else if (convert)
1636                     TRACE("Not doing upload because of format conversion.\n");
1637                 else
1638                 {
1639                     POINT dst_point = {dst_rect.left, dst_rect.top};
1640
1641                     if (SUCCEEDED(surface_upload_from_surface(dst_surface, &dst_point, src_surface, &src_rect)))
1642                     {
1643                         if (!surface_is_offscreen(dst_surface))
1644                             surface_load_location(dst_surface, dst_surface->draw_binding, NULL);
1645                         return WINED3D_OK;
1646                     }
1647                 }
1648             }
1649
1650             /* Use present for back -> front blits. The idea behind this is
1651              * that present is potentially faster than a blit, in particular
1652              * when FBO blits aren't available. Some ddraw applications like
1653              * Half-Life and Prince of Persia 3D use Blt() from the backbuffer
1654              * to the frontbuffer instead of doing a Flip(). D3D8 and D3D9
1655              * applications can't blit directly to the frontbuffer. */
1656             if (dst_swapchain && dst_swapchain->back_buffers
1657                     && dst_surface == dst_swapchain->front_buffer
1658                     && src_surface == dst_swapchain->back_buffers[0])
1659             {
1660                 enum wined3d_swap_effect swap_effect = dst_swapchain->desc.swap_effect;
1661
1662                 TRACE("Using present for backbuffer -> frontbuffer blit.\n");
1663
1664                 /* Set the swap effect to COPY, we don't want the backbuffer
1665                  * to become undefined. */
1666                 dst_swapchain->desc.swap_effect = WINED3D_SWAP_EFFECT_COPY;
1667                 wined3d_swapchain_present(dst_swapchain, NULL, NULL, dst_swapchain->win_handle, NULL, 0);
1668                 dst_swapchain->desc.swap_effect = swap_effect;
1669
1670                 return WINED3D_OK;
1671             }
1672
1673             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1674                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1675                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1676             {
1677                 TRACE("Using FBO blit.\n");
1678
1679                 surface_blt_fbo(device, filter,
1680                         src_surface, src_surface->draw_binding, &src_rect,
1681                         dst_surface, dst_surface->draw_binding, &dst_rect);
1682                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1683                 return WINED3D_OK;
1684             }
1685
1686             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1687                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1688                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1689             {
1690                 TRACE("Using arbfp blit.\n");
1691
1692                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1693                     return WINED3D_OK;
1694             }
1695         }
1696     }
1697
1698 fallback:
1699
1700     /* Special cases for render targets. */
1701     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1702             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1703     {
1704         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1705                 src_surface, &src_rect, flags, fx, filter)))
1706             return WINED3D_OK;
1707     }
1708
1709 cpu:
1710
1711     /* For the rest call the X11 surface implementation. For render targets
1712      * this should be implemented OpenGL accelerated in BltOverride, other
1713      * blits are rather rare. */
1714     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1715 }
1716
1717 HRESULT CDECL wined3d_surface_get_render_target_data(struct wined3d_surface *surface,
1718         struct wined3d_surface *render_target)
1719 {
1720     TRACE("surface %p, render_target %p.\n", surface, render_target);
1721
1722     /* TODO: Check surface sizes, pools, etc. */
1723
1724     if (render_target->resource.multisample_type)
1725         return WINED3DERR_INVALIDCALL;
1726
1727     return wined3d_surface_blt(surface, NULL, render_target, NULL, 0, NULL, WINED3D_TEXF_POINT);
1728 }
1729
1730 /* Context activation is done by the caller. */
1731 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1732 {
1733     if (surface->flags & SFLAG_DIBSECTION)
1734     {
1735         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1736     }
1737     else
1738     {
1739         if (!surface->resource.heapMemory)
1740             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1741         else if (!(surface->flags & SFLAG_CLIENT))
1742             ERR("Surface %p has heapMemory %p and flags %#x.\n",
1743                     surface, surface->resource.heapMemory, surface->flags);
1744
1745         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1746                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1747     }
1748
1749     ENTER_GL();
1750     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1751     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1752     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1753             surface->resource.size, surface->resource.allocatedMemory));
1754     checkGLcall("glGetBufferSubDataARB");
1755     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1756     checkGLcall("glDeleteBuffersARB");
1757     LEAVE_GL();
1758
1759     surface->pbo = 0;
1760     surface->flags &= ~SFLAG_PBO;
1761 }
1762
1763 /* Do not call while under the GL lock. */
1764 static void surface_unload(struct wined3d_resource *resource)
1765 {
1766     struct wined3d_surface *surface = surface_from_resource(resource);
1767     struct wined3d_renderbuffer_entry *entry, *entry2;
1768     struct wined3d_device *device = resource->device;
1769     const struct wined3d_gl_info *gl_info;
1770     struct wined3d_context *context;
1771
1772     TRACE("surface %p.\n", surface);
1773
1774     if (resource->pool == WINED3D_POOL_DEFAULT)
1775     {
1776         /* Default pool resources are supposed to be destroyed before Reset is called.
1777          * Implicit resources stay however. So this means we have an implicit render target
1778          * or depth stencil. The content may be destroyed, but we still have to tear down
1779          * opengl resources, so we cannot leave early.
1780          *
1781          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1782          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1783          * or the depth stencil into an FBO the texture or render buffer will be removed
1784          * and all flags get lost
1785          */
1786         if (!(surface->flags & SFLAG_PBO))
1787             surface_init_sysmem(surface);
1788         /* We also get here when the ddraw swapchain is destroyed, for example
1789          * for a mode switch. In this case this surface won't necessarily be
1790          * an implicit surface. We have to mark it lost so that the
1791          * application can restore it after the mode switch. */
1792         surface->flags |= SFLAG_LOST;
1793     }
1794     else
1795     {
1796         /* Load the surface into system memory */
1797         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1798         surface_modify_location(surface, surface->draw_binding, FALSE);
1799     }
1800     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1801     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1802     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1803
1804     context = context_acquire(device, NULL);
1805     gl_info = context->gl_info;
1806
1807     /* Destroy PBOs, but load them into real sysmem before */
1808     if (surface->flags & SFLAG_PBO)
1809         surface_remove_pbo(surface, gl_info);
1810
1811     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1812      * all application-created targets the application has to release the surface
1813      * before calling _Reset
1814      */
1815     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1816     {
1817         ENTER_GL();
1818         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1819         LEAVE_GL();
1820         list_remove(&entry->entry);
1821         HeapFree(GetProcessHeap(), 0, entry);
1822     }
1823     list_init(&surface->renderbuffers);
1824     surface->current_renderbuffer = NULL;
1825
1826     ENTER_GL();
1827
1828     /* If we're in a texture, the texture name belongs to the texture.
1829      * Otherwise, destroy it. */
1830     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1831     {
1832         glDeleteTextures(1, &surface->texture_name);
1833         surface->texture_name = 0;
1834         glDeleteTextures(1, &surface->texture_name_srgb);
1835         surface->texture_name_srgb = 0;
1836     }
1837     if (surface->rb_multisample)
1838     {
1839         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1840         surface->rb_multisample = 0;
1841     }
1842     if (surface->rb_resolved)
1843     {
1844         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1845         surface->rb_resolved = 0;
1846     }
1847
1848     LEAVE_GL();
1849
1850     context_release(context);
1851
1852     resource_unload(resource);
1853 }
1854
1855 static const struct wined3d_resource_ops surface_resource_ops =
1856 {
1857     surface_unload,
1858 };
1859
1860 static const struct wined3d_surface_ops surface_ops =
1861 {
1862     surface_private_setup,
1863     surface_realize_palette,
1864     surface_map,
1865     surface_unmap,
1866 };
1867
1868 /*****************************************************************************
1869  * Initializes the GDI surface, aka creates the DIB section we render to
1870  * The DIB section creation is done by calling GetDC, which will create the
1871  * section and releasing the dc to allow the app to use it. The dib section
1872  * will stay until the surface is released
1873  *
1874  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1875  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1876  * avoid confusion in the shared surface code.
1877  *
1878  * Returns:
1879  *  WINED3D_OK on success
1880  *  The return values of called methods on failure
1881  *
1882  *****************************************************************************/
1883 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1884 {
1885     HRESULT hr;
1886
1887     TRACE("surface %p.\n", surface);
1888
1889     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1890     {
1891         ERR("Overlays not yet supported by GDI surfaces.\n");
1892         return WINED3DERR_INVALIDCALL;
1893     }
1894
1895     /* Sysmem textures have memory already allocated - release it,
1896      * this avoids an unnecessary memcpy. */
1897     hr = surface_create_dib_section(surface);
1898     if (SUCCEEDED(hr))
1899     {
1900         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1901         surface->resource.heapMemory = NULL;
1902         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1903     }
1904
1905     /* We don't mind the nonpow2 stuff in GDI. */
1906     surface->pow2Width = surface->resource.width;
1907     surface->pow2Height = surface->resource.height;
1908
1909     return WINED3D_OK;
1910 }
1911
1912 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1913 {
1914     struct wined3d_palette *palette = surface->palette;
1915
1916     TRACE("surface %p.\n", surface);
1917
1918     if (!palette) return;
1919
1920     if (surface->flags & SFLAG_DIBSECTION)
1921     {
1922         RGBQUAD col[256];
1923         unsigned int i;
1924
1925         TRACE("Updating the DC's palette.\n");
1926
1927         for (i = 0; i < 256; ++i)
1928         {
1929             col[i].rgbRed = palette->palents[i].peRed;
1930             col[i].rgbGreen = palette->palents[i].peGreen;
1931             col[i].rgbBlue = palette->palents[i].peBlue;
1932             col[i].rgbReserved = 0;
1933         }
1934         SetDIBColorTable(surface->hDC, 0, 256, col);
1935     }
1936
1937     /* Update the image because of the palette change. Some games like e.g.
1938      * Red Alert call SetEntries a lot to implement fading. */
1939     /* Tell the swapchain to update the screen. */
1940     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1941     {
1942         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1943         if (surface == swapchain->front_buffer)
1944         {
1945             x11_copy_to_screen(swapchain, NULL);
1946         }
1947     }
1948 }
1949
1950 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
1951 {
1952     TRACE("surface %p, rect %s, flags %#x.\n",
1953             surface, wine_dbgstr_rect(rect), flags);
1954
1955     if (!(surface->flags & SFLAG_DIBSECTION))
1956     {
1957         HRESULT hr;
1958
1959         /* This happens on gdi surfaces if the application set a user pointer
1960          * and resets it. Recreate the DIB section. */
1961         if (FAILED(hr = surface_create_dib_section(surface)))
1962         {
1963             ERR("Failed to create dib section, hr %#x.\n", hr);
1964             return;
1965         }
1966         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1967         surface->resource.heapMemory = NULL;
1968         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1969     }
1970 }
1971
1972 static void gdi_surface_unmap(struct wined3d_surface *surface)
1973 {
1974     TRACE("surface %p.\n", surface);
1975
1976     /* Tell the swapchain to update the screen. */
1977     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1978     {
1979         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1980         if (surface == swapchain->front_buffer)
1981         {
1982             x11_copy_to_screen(swapchain, &surface->lockedRect);
1983         }
1984     }
1985
1986     memset(&surface->lockedRect, 0, sizeof(RECT));
1987 }
1988
1989 static const struct wined3d_surface_ops gdi_surface_ops =
1990 {
1991     gdi_surface_private_setup,
1992     gdi_surface_realize_palette,
1993     gdi_surface_map,
1994     gdi_surface_unmap,
1995 };
1996
1997 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
1998 {
1999     GLuint *name;
2000     DWORD flag;
2001
2002     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2003
2004     if(srgb)
2005     {
2006         name = &surface->texture_name_srgb;
2007         flag = SFLAG_INSRGBTEX;
2008     }
2009     else
2010     {
2011         name = &surface->texture_name;
2012         flag = SFLAG_INTEXTURE;
2013     }
2014
2015     if (!*name && new_name)
2016     {
2017         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2018          * surface has no texture name yet. See if we can get rid of this. */
2019         if (surface->flags & flag)
2020         {
2021             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2022             surface_modify_location(surface, flag, FALSE);
2023         }
2024     }
2025
2026     *name = new_name;
2027     surface_force_reload(surface);
2028 }
2029
2030 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
2031 {
2032     TRACE("surface %p, target %#x.\n", surface, target);
2033
2034     if (surface->texture_target != target)
2035     {
2036         if (target == GL_TEXTURE_RECTANGLE_ARB)
2037         {
2038             surface->flags &= ~SFLAG_NORMCOORD;
2039         }
2040         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2041         {
2042             surface->flags |= SFLAG_NORMCOORD;
2043         }
2044     }
2045     surface->texture_target = target;
2046     surface_force_reload(surface);
2047 }
2048
2049 /* Context activation is done by the caller. */
2050 void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
2051 {
2052     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
2053
2054     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2055     {
2056         struct wined3d_texture *texture = surface->container.u.texture;
2057
2058         TRACE("Passing to container (%p).\n", texture);
2059         texture->texture_ops->texture_bind(texture, context, srgb);
2060     }
2061     else
2062     {
2063         if (surface->texture_level)
2064         {
2065             ERR("Standalone surface %p is non-zero texture level %u.\n",
2066                     surface, surface->texture_level);
2067         }
2068
2069         if (srgb)
2070             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
2071
2072         ENTER_GL();
2073
2074         if (!surface->texture_name)
2075         {
2076             glGenTextures(1, &surface->texture_name);
2077             checkGLcall("glGenTextures");
2078
2079             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
2080
2081             context_bind_texture(context, surface->texture_target, surface->texture_name);
2082             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2083             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2084             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
2085             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2086             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2087             checkGLcall("glTexParameteri");
2088         }
2089         else
2090         {
2091             context_bind_texture(context, surface->texture_target, surface->texture_name);
2092         }
2093
2094         LEAVE_GL();
2095     }
2096 }
2097
2098 /* This call just downloads data, the caller is responsible for binding the
2099  * correct texture. */
2100 /* Context activation is done by the caller. */
2101 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2102 {
2103     const struct wined3d_format *format = surface->resource.format;
2104
2105     /* Only support read back of converted P8 surfaces. */
2106     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2107     {
2108         ERR("Trying to read back converted surface %p with format %s.\n", surface, debug_d3dformat(format->id));
2109         return;
2110     }
2111
2112     ENTER_GL();
2113
2114     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2115     {
2116         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2117                 surface, surface->texture_level, format->glFormat, format->glType,
2118                 surface->resource.allocatedMemory);
2119
2120         if (surface->flags & SFLAG_PBO)
2121         {
2122             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2123             checkGLcall("glBindBufferARB");
2124             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2125             checkGLcall("glGetCompressedTexImageARB");
2126             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2127             checkGLcall("glBindBufferARB");
2128         }
2129         else
2130         {
2131             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2132                     surface->texture_level, surface->resource.allocatedMemory));
2133             checkGLcall("glGetCompressedTexImageARB");
2134         }
2135
2136         LEAVE_GL();
2137     }
2138     else
2139     {
2140         void *mem;
2141         GLenum gl_format = format->glFormat;
2142         GLenum gl_type = format->glType;
2143         int src_pitch = 0;
2144         int dst_pitch = 0;
2145
2146         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2147         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2148         {
2149             gl_format = GL_ALPHA;
2150             gl_type = GL_UNSIGNED_BYTE;
2151         }
2152
2153         if (surface->flags & SFLAG_NONPOW2)
2154         {
2155             unsigned char alignment = surface->resource.device->surface_alignment;
2156             src_pitch = format->byte_count * surface->pow2Width;
2157             dst_pitch = wined3d_surface_get_pitch(surface);
2158             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2159             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2160         }
2161         else
2162         {
2163             mem = surface->resource.allocatedMemory;
2164         }
2165
2166         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2167                 surface, surface->texture_level, gl_format, gl_type, mem);
2168
2169         if (surface->flags & SFLAG_PBO)
2170         {
2171             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2172             checkGLcall("glBindBufferARB");
2173
2174             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2175             checkGLcall("glGetTexImage");
2176
2177             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2178             checkGLcall("glBindBufferARB");
2179         }
2180         else
2181         {
2182             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2183             checkGLcall("glGetTexImage");
2184         }
2185         LEAVE_GL();
2186
2187         if (surface->flags & SFLAG_NONPOW2)
2188         {
2189             const BYTE *src_data;
2190             BYTE *dst_data;
2191             UINT y;
2192             /*
2193              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2194              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2195              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2196              *
2197              * We're doing this...
2198              *
2199              * instead of boxing the texture :
2200              * |<-texture width ->|  -->pow2width|   /\
2201              * |111111111111111111|              |   |
2202              * |222 Texture 222222| boxed empty  | texture height
2203              * |3333 Data 33333333|              |   |
2204              * |444444444444444444|              |   \/
2205              * -----------------------------------   |
2206              * |     boxed  empty | boxed empty  | pow2height
2207              * |                  |              |   \/
2208              * -----------------------------------
2209              *
2210              *
2211              * we're repacking the data to the expected texture width
2212              *
2213              * |<-texture width ->|  -->pow2width|   /\
2214              * |111111111111111111222222222222222|   |
2215              * |222333333333333333333444444444444| texture height
2216              * |444444                           |   |
2217              * |                                 |   \/
2218              * |                                 |   |
2219              * |            empty                | pow2height
2220              * |                                 |   \/
2221              * -----------------------------------
2222              *
2223              * == is the same as
2224              *
2225              * |<-texture width ->|    /\
2226              * |111111111111111111|
2227              * |222222222222222222|texture height
2228              * |333333333333333333|
2229              * |444444444444444444|    \/
2230              * --------------------
2231              *
2232              * this also means that any references to allocatedMemory should work with the data as if were a
2233              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2234              *
2235              * internally the texture is still stored in a boxed format so any references to textureName will
2236              * get a boxed texture with width pow2width and not a texture of width resource.width.
2237              *
2238              * Performance should not be an issue, because applications normally do not lock the surfaces when
2239              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2240              * and doesn't have to be re-read. */
2241             src_data = mem;
2242             dst_data = surface->resource.allocatedMemory;
2243             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2244             for (y = 1; y < surface->resource.height; ++y)
2245             {
2246                 /* skip the first row */
2247                 src_data += src_pitch;
2248                 dst_data += dst_pitch;
2249                 memcpy(dst_data, src_data, dst_pitch);
2250             }
2251
2252             HeapFree(GetProcessHeap(), 0, mem);
2253         }
2254     }
2255
2256     /* Surface has now been downloaded */
2257     surface->flags |= SFLAG_INSYSMEM;
2258 }
2259
2260 /* This call just uploads data, the caller is responsible for binding the
2261  * correct texture. */
2262 /* Context activation is done by the caller. */
2263 static void surface_upload_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2264         const struct wined3d_format *format, const RECT *src_rect, UINT src_pitch, const POINT *dst_point,
2265         BOOL srgb, const struct wined3d_bo_address *data)
2266 {
2267     UINT update_w = src_rect->right - src_rect->left;
2268     UINT update_h = src_rect->bottom - src_rect->top;
2269
2270     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_pitch %u, dst_point %s, srgb %#x, data {%#x:%p}.\n",
2271             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_pitch,
2272             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2273
2274     if (surface->flags & SFLAG_LOCKED)
2275     {
2276         WARN("Uploading a surface that is currently mapped, setting SFLAG_PIN_SYSMEM.\n");
2277         surface->flags |= SFLAG_PIN_SYSMEM;
2278     }
2279
2280     if (format->flags & WINED3DFMT_FLAG_HEIGHT_SCALE)
2281     {
2282         update_h *= format->height_scale.numerator;
2283         update_h /= format->height_scale.denominator;
2284     }
2285
2286     ENTER_GL();
2287
2288     if (data->buffer_object)
2289     {
2290         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2291         checkGLcall("glBindBufferARB");
2292     }
2293
2294     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2295     {
2296         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2297         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2298         const BYTE *addr = data->addr;
2299         GLenum internal;
2300
2301         addr += (src_rect->top / format->block_height) * src_pitch;
2302         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2303
2304         if (srgb)
2305             internal = format->glGammaInternal;
2306         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2307             internal = format->rtInternal;
2308         else
2309             internal = format->glInternal;
2310
2311         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2312                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2313                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2314
2315         if (row_length == src_pitch)
2316         {
2317             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2318                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2319         }
2320         else
2321         {
2322             UINT row, y;
2323
2324             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2325              * can't use the unpack row length like below. */
2326             for (row = 0, y = dst_point->y; row < row_count; ++row)
2327             {
2328                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2329                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2330                 y += format->block_height;
2331                 addr += src_pitch;
2332             }
2333         }
2334         checkGLcall("glCompressedTexSubImage2DARB");
2335     }
2336     else
2337     {
2338         const BYTE *addr = data->addr;
2339
2340         addr += src_rect->top * src_pitch;
2341         addr += src_rect->left * format->byte_count;
2342
2343         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2344                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2345                 update_w, update_h, format->glFormat, format->glType, addr);
2346
2347         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_pitch / format->byte_count);
2348         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2349                 update_w, update_h, format->glFormat, format->glType, addr);
2350         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2351         checkGLcall("glTexSubImage2D");
2352     }
2353
2354     if (data->buffer_object)
2355     {
2356         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2357         checkGLcall("glBindBufferARB");
2358     }
2359
2360     LEAVE_GL();
2361
2362     if (wined3d_settings.strict_draw_ordering)
2363         wglFlush();
2364
2365     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2366     {
2367         struct wined3d_device *device = surface->resource.device;
2368         unsigned int i;
2369
2370         for (i = 0; i < device->context_count; ++i)
2371         {
2372             context_surface_update(device->contexts[i], surface);
2373         }
2374     }
2375 }
2376
2377 HRESULT surface_upload_from_surface(struct wined3d_surface *dst_surface, const POINT *dst_point,
2378         struct wined3d_surface *src_surface, const RECT *src_rect)
2379 {
2380     const struct wined3d_format *src_format;
2381     const struct wined3d_format *dst_format;
2382     const struct wined3d_gl_info *gl_info;
2383     enum wined3d_conversion_type convert;
2384     struct wined3d_context *context;
2385     struct wined3d_bo_address data;
2386     struct wined3d_format format;
2387     UINT update_w, update_h;
2388     UINT dst_w, dst_h;
2389     UINT src_w, src_h;
2390     UINT src_pitch;
2391     POINT p;
2392     RECT r;
2393
2394     TRACE("dst_surface %p, dst_point %s, src_surface %p, src_rect %s.\n",
2395             dst_surface, wine_dbgstr_point(dst_point),
2396             src_surface, wine_dbgstr_rect(src_rect));
2397
2398     src_format = src_surface->resource.format;
2399     dst_format = dst_surface->resource.format;
2400
2401     if (src_format->id != dst_format->id)
2402     {
2403         WARN("Source and destination surfaces should have the same format.\n");
2404         return WINED3DERR_INVALIDCALL;
2405     }
2406
2407     if (!dst_point)
2408     {
2409         p.x = 0;
2410         p.y = 0;
2411         dst_point = &p;
2412     }
2413     else if (dst_point->x < 0 || dst_point->y < 0)
2414     {
2415         WARN("Invalid destination point.\n");
2416         return WINED3DERR_INVALIDCALL;
2417     }
2418
2419     if (!src_rect)
2420     {
2421         r.left = 0;
2422         r.top = 0;
2423         r.right = src_surface->resource.width;
2424         r.bottom = src_surface->resource.height;
2425         src_rect = &r;
2426     }
2427     else if (src_rect->left < 0 || src_rect->left >= src_rect->right
2428             || src_rect->top < 0 || src_rect->top >= src_rect->bottom)
2429     {
2430         WARN("Invalid source rectangle.\n");
2431         return WINED3DERR_INVALIDCALL;
2432     }
2433
2434     src_w = src_surface->resource.width;
2435     src_h = src_surface->resource.height;
2436
2437     dst_w = dst_surface->resource.width;
2438     dst_h = dst_surface->resource.height;
2439
2440     update_w = src_rect->right - src_rect->left;
2441     update_h = src_rect->bottom - src_rect->top;
2442
2443     if (update_w > dst_w || dst_point->x > dst_w - update_w
2444             || update_h > dst_h || dst_point->y > dst_h - update_h)
2445     {
2446         WARN("Destination out of bounds.\n");
2447         return WINED3DERR_INVALIDCALL;
2448     }
2449
2450     /* NPOT block sizes would be silly. */
2451     if ((src_format->flags & WINED3DFMT_FLAG_BLOCKS)
2452             && ((update_w & (src_format->block_width - 1) || update_h & (src_format->block_height - 1))
2453             && (src_w != update_w || dst_w != update_w || src_h != update_h || dst_h != update_h)))
2454     {
2455         WARN("Update rect not block-aligned.\n");
2456         return WINED3DERR_INVALIDCALL;
2457     }
2458
2459     /* Use wined3d_surface_blt() instead of uploading directly if we need conversion. */
2460     d3dfmt_get_conv(dst_surface, FALSE, TRUE, &format, &convert);
2461     if (convert != WINED3D_CT_NONE || format.convert)
2462     {
2463         RECT dst_rect = {dst_point->x,  dst_point->y, dst_point->x + update_w, dst_point->y + update_h};
2464         return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, src_rect, 0, NULL, WINED3D_TEXF_POINT);
2465     }
2466
2467     context = context_acquire(dst_surface->resource.device, NULL);
2468     gl_info = context->gl_info;
2469
2470     /* Only load the surface for partial updates. For newly allocated texture
2471      * the texture wouldn't be the current location, and we'd upload zeroes
2472      * just to overwrite them again. */
2473     if (update_w == dst_w && update_h == dst_h)
2474         surface_prepare_texture(dst_surface, context, FALSE);
2475     else
2476         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
2477     surface_bind(dst_surface, context, FALSE);
2478
2479     data.buffer_object = src_surface->pbo;
2480     data.addr = src_surface->resource.allocatedMemory;
2481     src_pitch = wined3d_surface_get_pitch(src_surface);
2482
2483     surface_upload_data(dst_surface, gl_info, src_format, src_rect, src_pitch, dst_point, FALSE, &data);
2484
2485     invalidate_active_texture(dst_surface->resource.device, context);
2486
2487     context_release(context);
2488
2489     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
2490     return WINED3D_OK;
2491 }
2492
2493 /* This call just allocates the texture, the caller is responsible for binding
2494  * the correct texture. */
2495 /* Context activation is done by the caller. */
2496 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2497         const struct wined3d_format *format, BOOL srgb)
2498 {
2499     BOOL enable_client_storage = FALSE;
2500     GLsizei width = surface->pow2Width;
2501     GLsizei height = surface->pow2Height;
2502     const BYTE *mem = NULL;
2503     GLenum internal;
2504
2505     if (srgb)
2506     {
2507         internal = format->glGammaInternal;
2508     }
2509     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2510     {
2511         internal = format->rtInternal;
2512     }
2513     else
2514     {
2515         internal = format->glInternal;
2516     }
2517
2518     if (format->flags & WINED3DFMT_FLAG_HEIGHT_SCALE)
2519     {
2520         height *= format->height_scale.numerator;
2521         height /= format->height_scale.denominator;
2522     }
2523
2524     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2525             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2526             internal, width, height, format->glFormat, format->glType);
2527
2528     ENTER_GL();
2529
2530     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2531     {
2532         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2533                 || !surface->resource.allocatedMemory)
2534         {
2535             /* In some cases we want to disable client storage.
2536              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2537              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2538              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2539              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2540              */
2541             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2542             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2543             surface->flags &= ~SFLAG_CLIENT;
2544             enable_client_storage = TRUE;
2545         }
2546         else
2547         {
2548             surface->flags |= SFLAG_CLIENT;
2549
2550             /* Point OpenGL to our allocated texture memory. Do not use
2551              * resource.allocatedMemory here because it might point into a
2552              * PBO. Instead use heapMemory, but get the alignment right. */
2553             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2554                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2555         }
2556     }
2557
2558     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2559     {
2560         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2561                 internal, width, height, 0, surface->resource.size, mem));
2562         checkGLcall("glCompressedTexImage2DARB");
2563     }
2564     else
2565     {
2566         glTexImage2D(surface->texture_target, surface->texture_level,
2567                 internal, width, height, 0, format->glFormat, format->glType, mem);
2568         checkGLcall("glTexImage2D");
2569     }
2570
2571     if(enable_client_storage) {
2572         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2573         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2574     }
2575     LEAVE_GL();
2576 }
2577
2578 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2579  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2580 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2581 /* GL locking is done by the caller */
2582 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2583 {
2584     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2585     struct wined3d_renderbuffer_entry *entry;
2586     GLuint renderbuffer = 0;
2587     unsigned int src_width, src_height;
2588     unsigned int width, height;
2589
2590     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2591     {
2592         width = rt->pow2Width;
2593         height = rt->pow2Height;
2594     }
2595     else
2596     {
2597         width = surface->pow2Width;
2598         height = surface->pow2Height;
2599     }
2600
2601     src_width = surface->pow2Width;
2602     src_height = surface->pow2Height;
2603
2604     /* A depth stencil smaller than the render target is not valid */
2605     if (width > src_width || height > src_height) return;
2606
2607     /* Remove any renderbuffer set if the sizes match */
2608     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2609             || (width == src_width && height == src_height))
2610     {
2611         surface->current_renderbuffer = NULL;
2612         return;
2613     }
2614
2615     /* Look if we've already got a renderbuffer of the correct dimensions */
2616     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2617     {
2618         if (entry->width == width && entry->height == height)
2619         {
2620             renderbuffer = entry->id;
2621             surface->current_renderbuffer = entry;
2622             break;
2623         }
2624     }
2625
2626     if (!renderbuffer)
2627     {
2628         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2629         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2630         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2631                 surface->resource.format->glInternal, width, height);
2632
2633         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2634         entry->width = width;
2635         entry->height = height;
2636         entry->id = renderbuffer;
2637         list_add_head(&surface->renderbuffers, &entry->entry);
2638
2639         surface->current_renderbuffer = entry;
2640     }
2641
2642     checkGLcall("set_compatible_renderbuffer");
2643 }
2644
2645 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2646 {
2647     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2648
2649     TRACE("surface %p.\n", surface);
2650
2651     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2652     {
2653         ERR("Surface %p is not on a swapchain.\n", surface);
2654         return GL_NONE;
2655     }
2656
2657     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2658     {
2659         if (swapchain->render_to_fbo)
2660         {
2661             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2662             return GL_COLOR_ATTACHMENT0;
2663         }
2664         TRACE("Returning GL_BACK\n");
2665         return GL_BACK;
2666     }
2667     else if (surface == swapchain->front_buffer)
2668     {
2669         TRACE("Returning GL_FRONT\n");
2670         return GL_FRONT;
2671     }
2672
2673     FIXME("Higher back buffer, returning GL_BACK\n");
2674     return GL_BACK;
2675 }
2676
2677 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2678 void surface_add_dirty_rect(struct wined3d_surface *surface, const struct wined3d_box *dirty_rect)
2679 {
2680     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2681
2682     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2683         /* No partial locking for textures yet. */
2684         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2685
2686     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2687     if (dirty_rect)
2688     {
2689         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->left);
2690         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->top);
2691         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->right);
2692         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->bottom);
2693     }
2694     else
2695     {
2696         surface->dirtyRect.left = 0;
2697         surface->dirtyRect.top = 0;
2698         surface->dirtyRect.right = surface->resource.width;
2699         surface->dirtyRect.bottom = surface->resource.height;
2700     }
2701
2702     /* if the container is a texture then mark it dirty. */
2703     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2704     {
2705         TRACE("Passing to container.\n");
2706         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2707     }
2708 }
2709
2710 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2711 {
2712     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2713     BOOL ck_changed;
2714
2715     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2716
2717     if (surface->resource.pool == WINED3D_POOL_SCRATCH)
2718     {
2719         ERR("Not supported on scratch surfaces.\n");
2720         return WINED3DERR_INVALIDCALL;
2721     }
2722
2723     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2724
2725     /* Reload if either the texture and sysmem have different ideas about the
2726      * color key, or the actual key values changed. */
2727     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2728             && (surface->gl_color_key.color_space_low_value != surface->src_blt_color_key.color_space_low_value
2729             || surface->gl_color_key.color_space_high_value != surface->src_blt_color_key.color_space_high_value)))
2730     {
2731         TRACE("Reloading because of color keying\n");
2732         /* To perform the color key conversion we need a sysmem copy of
2733          * the surface. Make sure we have it. */
2734
2735         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2736         /* Make sure the texture is reloaded because of the color key change,
2737          * this kills performance though :( */
2738         /* TODO: This is not necessarily needed with hw palettized texture support. */
2739         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2740         /* Switching color keying on / off may change the internal format. */
2741         if (ck_changed)
2742             surface_force_reload(surface);
2743     }
2744     else if (!(surface->flags & flag))
2745     {
2746         TRACE("Reloading because surface is dirty.\n");
2747     }
2748     else
2749     {
2750         TRACE("surface is already in texture\n");
2751         return WINED3D_OK;
2752     }
2753
2754     /* No partial locking for textures yet. */
2755     surface_load_location(surface, flag, NULL);
2756     surface_evict_sysmem(surface);
2757
2758     return WINED3D_OK;
2759 }
2760
2761 /* See also float_16_to_32() in wined3d_private.h */
2762 static inline unsigned short float_32_to_16(const float *in)
2763 {
2764     int exp = 0;
2765     float tmp = fabsf(*in);
2766     unsigned int mantissa;
2767     unsigned short ret;
2768
2769     /* Deal with special numbers */
2770     if (*in == 0.0f)
2771         return 0x0000;
2772     if (isnan(*in))
2773         return 0x7c01;
2774     if (isinf(*in))
2775         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2776
2777     if (tmp < powf(2, 10))
2778     {
2779         do
2780         {
2781             tmp = tmp * 2.0f;
2782             exp--;
2783         } while (tmp < powf(2, 10));
2784     }
2785     else if (tmp >= powf(2, 11))
2786     {
2787         do
2788         {
2789             tmp /= 2.0f;
2790             exp++;
2791         } while (tmp >= powf(2, 11));
2792     }
2793
2794     mantissa = (unsigned int)tmp;
2795     if (tmp - mantissa >= 0.5f)
2796         ++mantissa; /* Round to nearest, away from zero. */
2797
2798     exp += 10;  /* Normalize the mantissa. */
2799     exp += 15;  /* Exponent is encoded with excess 15. */
2800
2801     if (exp > 30) /* too big */
2802     {
2803         ret = 0x7c00; /* INF */
2804     }
2805     else if (exp <= 0)
2806     {
2807         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2808         while (exp <= 0)
2809         {
2810             mantissa = mantissa >> 1;
2811             ++exp;
2812         }
2813         ret = mantissa & 0x3ff;
2814     }
2815     else
2816     {
2817         ret = (exp << 10) | (mantissa & 0x3ff);
2818     }
2819
2820     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2821     return ret;
2822 }
2823
2824 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2825 {
2826     ULONG refcount;
2827
2828     TRACE("Surface %p, container %p of type %#x.\n",
2829             surface, surface->container.u.base, surface->container.type);
2830
2831     switch (surface->container.type)
2832     {
2833         case WINED3D_CONTAINER_TEXTURE:
2834             return wined3d_texture_incref(surface->container.u.texture);
2835
2836         case WINED3D_CONTAINER_SWAPCHAIN:
2837             return wined3d_swapchain_incref(surface->container.u.swapchain);
2838
2839         default:
2840             ERR("Unhandled container type %#x.\n", surface->container.type);
2841         case WINED3D_CONTAINER_NONE:
2842             break;
2843     }
2844
2845     refcount = InterlockedIncrement(&surface->resource.ref);
2846     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2847
2848     return refcount;
2849 }
2850
2851 /* Do not call while under the GL lock. */
2852 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2853 {
2854     ULONG refcount;
2855
2856     TRACE("Surface %p, container %p of type %#x.\n",
2857             surface, surface->container.u.base, surface->container.type);
2858
2859     switch (surface->container.type)
2860     {
2861         case WINED3D_CONTAINER_TEXTURE:
2862             return wined3d_texture_decref(surface->container.u.texture);
2863
2864         case WINED3D_CONTAINER_SWAPCHAIN:
2865             return wined3d_swapchain_decref(surface->container.u.swapchain);
2866
2867         default:
2868             ERR("Unhandled container type %#x.\n", surface->container.type);
2869         case WINED3D_CONTAINER_NONE:
2870             break;
2871     }
2872
2873     refcount = InterlockedDecrement(&surface->resource.ref);
2874     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2875
2876     if (!refcount)
2877     {
2878         surface_cleanup(surface);
2879         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2880
2881         TRACE("Destroyed surface %p.\n", surface);
2882         HeapFree(GetProcessHeap(), 0, surface);
2883     }
2884
2885     return refcount;
2886 }
2887
2888 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2889 {
2890     return resource_set_priority(&surface->resource, priority);
2891 }
2892
2893 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2894 {
2895     return resource_get_priority(&surface->resource);
2896 }
2897
2898 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2899 {
2900     TRACE("surface %p.\n", surface);
2901
2902     if (!surface->resource.device->d3d_initialized)
2903     {
2904         ERR("D3D not initialized.\n");
2905         return;
2906     }
2907
2908     surface_internal_preload(surface, SRGB_ANY);
2909 }
2910
2911 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2912 {
2913     TRACE("surface %p.\n", surface);
2914
2915     return surface->resource.parent;
2916 }
2917
2918 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2919 {
2920     TRACE("surface %p.\n", surface);
2921
2922     return &surface->resource;
2923 }
2924
2925 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2926 {
2927     TRACE("surface %p, flags %#x.\n", surface, flags);
2928
2929     switch (flags)
2930     {
2931         case WINEDDGBS_CANBLT:
2932         case WINEDDGBS_ISBLTDONE:
2933             return WINED3D_OK;
2934
2935         default:
2936             return WINED3DERR_INVALIDCALL;
2937     }
2938 }
2939
2940 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2941 {
2942     TRACE("surface %p, flags %#x.\n", surface, flags);
2943
2944     /* XXX: DDERR_INVALIDSURFACETYPE */
2945
2946     switch (flags)
2947     {
2948         case WINEDDGFS_CANFLIP:
2949         case WINEDDGFS_ISFLIPDONE:
2950             return WINED3D_OK;
2951
2952         default:
2953             return WINED3DERR_INVALIDCALL;
2954     }
2955 }
2956
2957 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2958 {
2959     TRACE("surface %p.\n", surface);
2960
2961     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2962     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2963 }
2964
2965 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2966 {
2967     TRACE("surface %p.\n", surface);
2968
2969     surface->flags &= ~SFLAG_LOST;
2970     return WINED3D_OK;
2971 }
2972
2973 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2974 {
2975     TRACE("surface %p, palette %p.\n", surface, palette);
2976
2977     if (surface->palette == palette)
2978     {
2979         TRACE("Nop palette change.\n");
2980         return WINED3D_OK;
2981     }
2982
2983     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
2984         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
2985
2986     surface->palette = palette;
2987
2988     if (palette)
2989     {
2990         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
2991             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
2992
2993         surface->surface_ops->surface_realize_palette(surface);
2994     }
2995
2996     return WINED3D_OK;
2997 }
2998
2999 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
3000         DWORD flags, const struct wined3d_color_key *color_key)
3001 {
3002     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
3003
3004     if (flags & WINEDDCKEY_COLORSPACE)
3005     {
3006         FIXME(" colorkey value not supported (%08x) !\n", flags);
3007         return WINED3DERR_INVALIDCALL;
3008     }
3009
3010     /* Dirtify the surface, but only if a key was changed. */
3011     if (color_key)
3012     {
3013         switch (flags & ~WINEDDCKEY_COLORSPACE)
3014         {
3015             case WINEDDCKEY_DESTBLT:
3016                 surface->dst_blt_color_key = *color_key;
3017                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3018                 break;
3019
3020             case WINEDDCKEY_DESTOVERLAY:
3021                 surface->dst_overlay_color_key = *color_key;
3022                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3023                 break;
3024
3025             case WINEDDCKEY_SRCOVERLAY:
3026                 surface->src_overlay_color_key = *color_key;
3027                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3028                 break;
3029
3030             case WINEDDCKEY_SRCBLT:
3031                 surface->src_blt_color_key = *color_key;
3032                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3033                 break;
3034         }
3035     }
3036     else
3037     {
3038         switch (flags & ~WINEDDCKEY_COLORSPACE)
3039         {
3040             case WINEDDCKEY_DESTBLT:
3041                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3042                 break;
3043
3044             case WINEDDCKEY_DESTOVERLAY:
3045                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3046                 break;
3047
3048             case WINEDDCKEY_SRCOVERLAY:
3049                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3050                 break;
3051
3052             case WINEDDCKEY_SRCBLT:
3053                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3054                 break;
3055         }
3056     }
3057
3058     return WINED3D_OK;
3059 }
3060
3061 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3062 {
3063     TRACE("surface %p.\n", surface);
3064
3065     return surface->palette;
3066 }
3067
3068 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3069 {
3070     const struct wined3d_format *format = surface->resource.format;
3071     DWORD pitch;
3072
3073     TRACE("surface %p.\n", surface);
3074
3075     if (format->flags & WINED3DFMT_FLAG_BLOCKS)
3076     {
3077         /* Since compressed formats are block based, pitch means the amount of
3078          * bytes to the next row of block rather than the next row of pixels. */
3079         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3080         pitch = row_block_count * format->block_byte_count;
3081     }
3082     else
3083     {
3084         unsigned char alignment = surface->resource.device->surface_alignment;
3085         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3086         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3087     }
3088
3089     TRACE("Returning %u.\n", pitch);
3090
3091     return pitch;
3092 }
3093
3094 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3095 {
3096     TRACE("surface %p, mem %p.\n", surface, mem);
3097
3098     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
3099     {
3100         WARN("Surface is locked or the DC is in use.\n");
3101         return WINED3DERR_INVALIDCALL;
3102     }
3103
3104     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3105     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3106     {
3107         ERR("Not supported on render targets.\n");
3108         return WINED3DERR_INVALIDCALL;
3109     }
3110
3111     if (mem && mem != surface->resource.allocatedMemory)
3112     {
3113         void *release = NULL;
3114
3115         /* Do I have to copy the old surface content? */
3116         if (surface->flags & SFLAG_DIBSECTION)
3117         {
3118             DeleteDC(surface->hDC);
3119             DeleteObject(surface->dib.DIBsection);
3120             surface->dib.bitmap_data = NULL;
3121             surface->resource.allocatedMemory = NULL;
3122             surface->hDC = NULL;
3123             surface->flags &= ~SFLAG_DIBSECTION;
3124         }
3125         else if (!(surface->flags & SFLAG_USERPTR))
3126         {
3127             release = surface->resource.heapMemory;
3128             surface->resource.heapMemory = NULL;
3129         }
3130         surface->resource.allocatedMemory = mem;
3131         surface->flags |= SFLAG_USERPTR;
3132
3133         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3134         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3135
3136         /* For client textures OpenGL has to be notified. */
3137         if (surface->flags & SFLAG_CLIENT)
3138             surface_release_client_storage(surface);
3139
3140         /* Now free the old memory if any. */
3141         HeapFree(GetProcessHeap(), 0, release);
3142     }
3143     else if (surface->flags & SFLAG_USERPTR)
3144     {
3145         /* HeapMemory should be NULL already. */
3146         if (surface->resource.heapMemory)
3147             ERR("User pointer surface has heap memory allocated.\n");
3148
3149         if (!mem)
3150         {
3151             surface->resource.allocatedMemory = NULL;
3152             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3153
3154             if (surface->flags & SFLAG_CLIENT)
3155                 surface_release_client_storage(surface);
3156
3157             surface_prepare_system_memory(surface);
3158         }
3159
3160         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3161     }
3162
3163     return WINED3D_OK;
3164 }
3165
3166 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3167 {
3168     LONG w, h;
3169
3170     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3171
3172     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3173     {
3174         WARN("Not an overlay surface.\n");
3175         return WINEDDERR_NOTAOVERLAYSURFACE;
3176     }
3177
3178     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3179     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3180     surface->overlay_destrect.left = x;
3181     surface->overlay_destrect.top = y;
3182     surface->overlay_destrect.right = x + w;
3183     surface->overlay_destrect.bottom = y + h;
3184
3185     surface_draw_overlay(surface);
3186
3187     return WINED3D_OK;
3188 }
3189
3190 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3191 {
3192     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3193
3194     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3195     {
3196         TRACE("Not an overlay surface.\n");
3197         return WINEDDERR_NOTAOVERLAYSURFACE;
3198     }
3199
3200     if (!surface->overlay_dest)
3201     {
3202         TRACE("Overlay not visible.\n");
3203         *x = 0;
3204         *y = 0;
3205         return WINEDDERR_OVERLAYNOTVISIBLE;
3206     }
3207
3208     *x = surface->overlay_destrect.left;
3209     *y = surface->overlay_destrect.top;
3210
3211     TRACE("Returning position %d, %d.\n", *x, *y);
3212
3213     return WINED3D_OK;
3214 }
3215
3216 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3217         DWORD flags, struct wined3d_surface *ref)
3218 {
3219     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3220
3221     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3222     {
3223         TRACE("Not an overlay surface.\n");
3224         return WINEDDERR_NOTAOVERLAYSURFACE;
3225     }
3226
3227     return WINED3D_OK;
3228 }
3229
3230 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3231         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3232 {
3233     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3234             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3235
3236     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3237     {
3238         WARN("Not an overlay surface.\n");
3239         return WINEDDERR_NOTAOVERLAYSURFACE;
3240     }
3241     else if (!dst_surface)
3242     {
3243         WARN("Dest surface is NULL.\n");
3244         return WINED3DERR_INVALIDCALL;
3245     }
3246
3247     if (src_rect)
3248     {
3249         surface->overlay_srcrect = *src_rect;
3250     }
3251     else
3252     {
3253         surface->overlay_srcrect.left = 0;
3254         surface->overlay_srcrect.top = 0;
3255         surface->overlay_srcrect.right = surface->resource.width;
3256         surface->overlay_srcrect.bottom = surface->resource.height;
3257     }
3258
3259     if (dst_rect)
3260     {
3261         surface->overlay_destrect = *dst_rect;
3262     }
3263     else
3264     {
3265         surface->overlay_destrect.left = 0;
3266         surface->overlay_destrect.top = 0;
3267         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3268         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3269     }
3270
3271     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3272     {
3273         surface->overlay_dest = NULL;
3274         list_remove(&surface->overlay_entry);
3275     }
3276
3277     if (flags & WINEDDOVER_SHOW)
3278     {
3279         if (surface->overlay_dest != dst_surface)
3280         {
3281             surface->overlay_dest = dst_surface;
3282             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3283         }
3284     }
3285     else if (flags & WINEDDOVER_HIDE)
3286     {
3287         /* tests show that the rectangles are erased on hide */
3288         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3289         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3290         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3291         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3292         surface->overlay_dest = NULL;
3293     }
3294
3295     surface_draw_overlay(surface);
3296
3297     return WINED3D_OK;
3298 }
3299
3300 HRESULT CDECL wined3d_surface_update_desc(struct wined3d_surface *surface,
3301         UINT width, UINT height, enum wined3d_format_id format_id,
3302         enum wined3d_multisample_type multisample_type, UINT multisample_quality)
3303 {
3304     struct wined3d_device *device = surface->resource.device;
3305     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
3306     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
3307     UINT resource_size = wined3d_format_calculate_size(format, device->surface_alignment, width, height);
3308
3309     TRACE("surface %p, width %u, height %u, format %s, multisample_type %#x, multisample_quality %u.\n",
3310             surface, width, height, debug_d3dformat(format_id), multisample_type, multisample_type);
3311
3312     if (!resource_size)
3313         return WINED3DERR_INVALIDCALL;
3314
3315     if (device->d3d_initialized)
3316         surface->resource.resource_ops->resource_unload(&surface->resource);
3317
3318     if (surface->flags & SFLAG_DIBSECTION)
3319     {
3320         DeleteDC(surface->hDC);
3321         DeleteObject(surface->dib.DIBsection);
3322         surface->dib.bitmap_data = NULL;
3323         surface->flags &= ~SFLAG_DIBSECTION;
3324     }
3325
3326     surface->flags &= ~(SFLAG_LOCATIONS | SFLAG_USERPTR);
3327     surface->resource.allocatedMemory = NULL;
3328     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
3329     surface->resource.heapMemory = NULL;
3330
3331     surface->resource.width = width;
3332     surface->resource.height = height;
3333     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[ARB_TEXTURE_RECTANGLE]
3334             || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
3335     {
3336         surface->pow2Width = width;
3337         surface->pow2Height = height;
3338     }
3339     else
3340     {
3341         surface->pow2Width = surface->pow2Height = 1;
3342         while (surface->pow2Width < width)
3343             surface->pow2Width <<= 1;
3344         while (surface->pow2Height < height)
3345             surface->pow2Height <<= 1;
3346     }
3347
3348     if (surface->pow2Width != width || surface->pow2Height != height)
3349         surface->flags |= SFLAG_NONPOW2;
3350     else
3351         surface->flags &= ~SFLAG_NONPOW2;
3352
3353     surface->resource.format = format;
3354     surface->resource.multisample_type = multisample_type;
3355     surface->resource.multisample_quality = multisample_quality;
3356     surface->resource.size = resource_size;
3357
3358     if (!surface_init_sysmem(surface))
3359         return E_OUTOFMEMORY;
3360
3361     return WINED3D_OK;
3362 }
3363
3364 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3365         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3366 {
3367     unsigned short *dst_s;
3368     const float *src_f;
3369     unsigned int x, y;
3370
3371     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3372
3373     for (y = 0; y < h; ++y)
3374     {
3375         src_f = (const float *)(src + y * pitch_in);
3376         dst_s = (unsigned short *) (dst + y * pitch_out);
3377         for (x = 0; x < w; ++x)
3378         {
3379             dst_s[x] = float_32_to_16(src_f + x);
3380         }
3381     }
3382 }
3383
3384 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3385         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3386 {
3387     static const unsigned char convert_5to8[] =
3388     {
3389         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3390         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3391         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3392         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3393     };
3394     static const unsigned char convert_6to8[] =
3395     {
3396         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3397         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3398         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3399         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3400         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3401         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3402         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3403         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3404     };
3405     unsigned int x, y;
3406
3407     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3408
3409     for (y = 0; y < h; ++y)
3410     {
3411         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3412         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3413         for (x = 0; x < w; ++x)
3414         {
3415             WORD pixel = src_line[x];
3416             dst_line[x] = 0xff000000
3417                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3418                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3419                     | convert_5to8[(pixel & 0x001f)];
3420         }
3421     }
3422 }
3423
3424 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3425  * in both cases we're just setting the X / Alpha channel to 0xff. */
3426 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3427         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3428 {
3429     unsigned int x, y;
3430
3431     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3432
3433     for (y = 0; y < h; ++y)
3434     {
3435         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3436         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3437
3438         for (x = 0; x < w; ++x)
3439         {
3440             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3441         }
3442     }
3443 }
3444
3445 static inline BYTE cliptobyte(int x)
3446 {
3447     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3448 }
3449
3450 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3451         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3452 {
3453     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3454     unsigned int x, y;
3455
3456     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3457
3458     for (y = 0; y < h; ++y)
3459     {
3460         const BYTE *src_line = src + y * pitch_in;
3461         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3462         for (x = 0; x < w; ++x)
3463         {
3464             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3465              *     C = Y - 16; D = U - 128; E = V - 128;
3466              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3467              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3468              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3469              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3470              * U and V are shared between the pixels. */
3471             if (!(x & 1)) /* For every even pixel, read new U and V. */
3472             {
3473                 d = (int) src_line[1] - 128;
3474                 e = (int) src_line[3] - 128;
3475                 r2 = 409 * e + 128;
3476                 g2 = - 100 * d - 208 * e + 128;
3477                 b2 = 516 * d + 128;
3478             }
3479             c2 = 298 * ((int) src_line[0] - 16);
3480             dst_line[x] = 0xff000000
3481                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3482                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3483                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3484                 /* Scale RGB values to 0..255 range,
3485                  * then clip them if still not in range (may be negative),
3486                  * then shift them within DWORD if necessary. */
3487             src_line += 2;
3488         }
3489     }
3490 }
3491
3492 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3493         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3494 {
3495     unsigned int x, y;
3496     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3497
3498     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3499
3500     for (y = 0; y < h; ++y)
3501     {
3502         const BYTE *src_line = src + y * pitch_in;
3503         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3504         for (x = 0; x < w; ++x)
3505         {
3506             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3507              *     C = Y - 16; D = U - 128; E = V - 128;
3508              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3509              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3510              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3511              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3512              * U and V are shared between the pixels. */
3513             if (!(x & 1)) /* For every even pixel, read new U and V. */
3514             {
3515                 d = (int) src_line[1] - 128;
3516                 e = (int) src_line[3] - 128;
3517                 r2 = 409 * e + 128;
3518                 g2 = - 100 * d - 208 * e + 128;
3519                 b2 = 516 * d + 128;
3520             }
3521             c2 = 298 * ((int) src_line[0] - 16);
3522             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3523                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3524                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3525                 /* Scale RGB values to 0..255 range,
3526                  * then clip them if still not in range (may be negative),
3527                  * then shift them within DWORD if necessary. */
3528             src_line += 2;
3529         }
3530     }
3531 }
3532
3533 struct d3dfmt_convertor_desc
3534 {
3535     enum wined3d_format_id from, to;
3536     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3537 };
3538
3539 static const struct d3dfmt_convertor_desc convertors[] =
3540 {
3541     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3542     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3543     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3544     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3545     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3546     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3547 };
3548
3549 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3550         enum wined3d_format_id to)
3551 {
3552     unsigned int i;
3553
3554     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3555     {
3556         if (convertors[i].from == from && convertors[i].to == to)
3557             return &convertors[i];
3558     }
3559
3560     return NULL;
3561 }
3562
3563 /*****************************************************************************
3564  * surface_convert_format
3565  *
3566  * Creates a duplicate of a surface in a different format. Is used by Blt to
3567  * blit between surfaces with different formats.
3568  *
3569  * Parameters
3570  *  source: Source surface
3571  *  fmt: Requested destination format
3572  *
3573  *****************************************************************************/
3574 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3575 {
3576     struct wined3d_mapped_rect src_map, dst_map;
3577     const struct d3dfmt_convertor_desc *conv;
3578     struct wined3d_surface *ret = NULL;
3579     HRESULT hr;
3580
3581     conv = find_convertor(source->resource.format->id, to_fmt);
3582     if (!conv)
3583     {
3584         FIXME("Cannot find a conversion function from format %s to %s.\n",
3585                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3586         return NULL;
3587     }
3588
3589     wined3d_surface_create(source->resource.device, source->resource.width,
3590             source->resource.height, to_fmt, 0 /* level */, 0 /* usage */, WINED3D_POOL_SCRATCH,
3591             WINED3D_MULTISAMPLE_NONE /* TODO: Multisampled conversion */, 0 /* MultiSampleQuality */,
3592             source->surface_type, WINED3D_SURFACE_MAPPABLE | WINED3D_SURFACE_DISCARD,
3593             NULL /* parent */, &wined3d_null_parent_ops, &ret);
3594     if (!ret)
3595     {
3596         ERR("Failed to create a destination surface for conversion.\n");
3597         return NULL;
3598     }
3599
3600     memset(&src_map, 0, sizeof(src_map));
3601     memset(&dst_map, 0, sizeof(dst_map));
3602
3603     hr = wined3d_surface_map(source, &src_map, NULL, WINED3DLOCK_READONLY);
3604     if (FAILED(hr))
3605     {
3606         ERR("Failed to lock the source surface.\n");
3607         wined3d_surface_decref(ret);
3608         return NULL;
3609     }
3610     hr = wined3d_surface_map(ret, &dst_map, NULL, WINED3DLOCK_READONLY);
3611     if (FAILED(hr))
3612     {
3613         ERR("Failed to lock the destination surface.\n");
3614         wined3d_surface_unmap(source);
3615         wined3d_surface_decref(ret);
3616         return NULL;
3617     }
3618
3619     conv->convert(src_map.data, dst_map.data, src_map.row_pitch, dst_map.row_pitch,
3620             source->resource.width, source->resource.height);
3621
3622     wined3d_surface_unmap(ret);
3623     wined3d_surface_unmap(source);
3624
3625     return ret;
3626 }
3627
3628 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3629         unsigned int bpp, UINT pitch, DWORD color)
3630 {
3631     BYTE *first;
3632     int x, y;
3633
3634     /* Do first row */
3635
3636 #define COLORFILL_ROW(type) \
3637 do { \
3638     type *d = (type *)buf; \
3639     for (x = 0; x < width; ++x) \
3640         d[x] = (type)color; \
3641 } while(0)
3642
3643     switch (bpp)
3644     {
3645         case 1:
3646             COLORFILL_ROW(BYTE);
3647             break;
3648
3649         case 2:
3650             COLORFILL_ROW(WORD);
3651             break;
3652
3653         case 3:
3654         {
3655             BYTE *d = buf;
3656             for (x = 0; x < width; ++x, d += 3)
3657             {
3658                 d[0] = (color      ) & 0xFF;
3659                 d[1] = (color >>  8) & 0xFF;
3660                 d[2] = (color >> 16) & 0xFF;
3661             }
3662             break;
3663         }
3664         case 4:
3665             COLORFILL_ROW(DWORD);
3666             break;
3667
3668         default:
3669             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3670             return WINED3DERR_NOTAVAILABLE;
3671     }
3672
3673 #undef COLORFILL_ROW
3674
3675     /* Now copy first row. */
3676     first = buf;
3677     for (y = 1; y < height; ++y)
3678     {
3679         buf += pitch;
3680         memcpy(buf, first, width * bpp);
3681     }
3682
3683     return WINED3D_OK;
3684 }
3685
3686 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3687 {
3688     TRACE("surface %p.\n", surface);
3689
3690     if (!(surface->flags & SFLAG_LOCKED))
3691     {
3692         WARN("Trying to unmap unmapped surface.\n");
3693         return WINEDDERR_NOTLOCKED;
3694     }
3695     surface->flags &= ~SFLAG_LOCKED;
3696
3697     surface->surface_ops->surface_unmap(surface);
3698
3699     return WINED3D_OK;
3700 }
3701
3702 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3703         struct wined3d_mapped_rect *mapped_rect, const RECT *rect, DWORD flags)
3704 {
3705     const struct wined3d_format *format = surface->resource.format;
3706
3707     TRACE("surface %p, mapped_rect %p, rect %s, flags %#x.\n",
3708             surface, mapped_rect, wine_dbgstr_rect(rect), flags);
3709
3710     if (surface->flags & SFLAG_LOCKED)
3711     {
3712         WARN("Surface is already mapped.\n");
3713         return WINED3DERR_INVALIDCALL;
3714     }
3715     if ((format->flags & WINED3DFMT_FLAG_BLOCKS)
3716             && rect && (rect->left || rect->top
3717             || rect->right != surface->resource.width
3718             || rect->bottom != surface->resource.height))
3719     {
3720         UINT width_mask = format->block_width - 1;
3721         UINT height_mask = format->block_height - 1;
3722
3723         if ((rect->left & width_mask) || (rect->right & width_mask)
3724                 || (rect->top & height_mask) || (rect->bottom & height_mask))
3725         {
3726             WARN("Map rect %s is misaligned for %ux%u blocks.\n",
3727                     wine_dbgstr_rect(rect), format->block_width, format->block_height);
3728
3729             if (surface->resource.pool == WINED3D_POOL_DEFAULT)
3730                 return WINED3DERR_INVALIDCALL;
3731         }
3732     }
3733
3734     surface->flags |= SFLAG_LOCKED;
3735
3736     if (!(surface->flags & SFLAG_LOCKABLE))
3737         WARN("Trying to lock unlockable surface.\n");
3738
3739     /* Performance optimization: Count how often a surface is mapped, if it is
3740      * mapped regularly do not throw away the system memory copy. This avoids
3741      * the need to download the surface from OpenGL all the time. The surface
3742      * is still downloaded if the OpenGL texture is changed. */
3743     if (!(surface->flags & SFLAG_DYNLOCK))
3744     {
3745         if (++surface->lockCount > MAXLOCKCOUNT)
3746         {
3747             TRACE("Surface is mapped regularly, not freeing the system memory copy any more.\n");
3748             surface->flags |= SFLAG_DYNLOCK;
3749         }
3750     }
3751
3752     surface->surface_ops->surface_map(surface, rect, flags);
3753
3754     if (format->flags & WINED3DFMT_FLAG_BROKEN_PITCH)
3755         mapped_rect->row_pitch = surface->resource.width * format->byte_count;
3756     else
3757         mapped_rect->row_pitch = wined3d_surface_get_pitch(surface);
3758
3759     if (!rect)
3760     {
3761         mapped_rect->data = surface->resource.allocatedMemory;
3762         surface->lockedRect.left = 0;
3763         surface->lockedRect.top = 0;
3764         surface->lockedRect.right = surface->resource.width;
3765         surface->lockedRect.bottom = surface->resource.height;
3766     }
3767     else
3768     {
3769         if ((format->flags & (WINED3DFMT_FLAG_BLOCKS | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_BLOCKS)
3770         {
3771             /* Compressed textures are block based, so calculate the offset of
3772              * the block that contains the top-left pixel of the locked rectangle. */
3773             mapped_rect->data = surface->resource.allocatedMemory
3774                     + ((rect->top / format->block_height) * mapped_rect->row_pitch)
3775                     + ((rect->left / format->block_width) * format->block_byte_count);
3776         }
3777         else
3778         {
3779             mapped_rect->data = surface->resource.allocatedMemory
3780                     + (mapped_rect->row_pitch * rect->top)
3781                     + (rect->left * format->byte_count);
3782         }
3783         surface->lockedRect.left = rect->left;
3784         surface->lockedRect.top = rect->top;
3785         surface->lockedRect.right = rect->right;
3786         surface->lockedRect.bottom = rect->bottom;
3787     }
3788
3789     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3790     TRACE("Returning memory %p, pitch %u.\n", mapped_rect->data, mapped_rect->row_pitch);
3791
3792     return WINED3D_OK;
3793 }
3794
3795 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3796 {
3797     struct wined3d_mapped_rect map;
3798     HRESULT hr;
3799
3800     TRACE("surface %p, dc %p.\n", surface, dc);
3801
3802     if (surface->flags & SFLAG_USERPTR)
3803     {
3804         ERR("Not supported on surfaces with application-provided memory.\n");
3805         return WINEDDERR_NODC;
3806     }
3807
3808     /* Give more detailed info for ddraw. */
3809     if (surface->flags & SFLAG_DCINUSE)
3810         return WINEDDERR_DCALREADYCREATED;
3811
3812     /* Can't GetDC if the surface is locked. */
3813     if (surface->flags & SFLAG_LOCKED)
3814         return WINED3DERR_INVALIDCALL;
3815
3816     /* Create a DIB section if there isn't a dc yet. */
3817     if (!surface->hDC)
3818     {
3819         if (surface->flags & SFLAG_CLIENT)
3820         {
3821             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3822             surface_release_client_storage(surface);
3823         }
3824         hr = surface_create_dib_section(surface);
3825         if (FAILED(hr))
3826             return WINED3DERR_INVALIDCALL;
3827
3828         /* Use the DIB section from now on if we are not using a PBO. */
3829         if (!(surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM)))
3830         {
3831             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
3832             surface->resource.heapMemory = NULL;
3833             surface->resource.allocatedMemory = surface->dib.bitmap_data;
3834         }
3835     }
3836
3837     /* Map the surface. */
3838     hr = wined3d_surface_map(surface, &map, NULL, 0);
3839     if (FAILED(hr))
3840     {
3841         ERR("Map failed, hr %#x.\n", hr);
3842         return hr;
3843     }
3844
3845     /* Sync the DIB with the PBO. This can't be done earlier because Map()
3846      * activates the allocatedMemory. */
3847     if (surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM))
3848         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
3849
3850     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3851             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3852     {
3853         /* GetDC on palettized formats is unsupported in D3D9, and the method
3854          * is missing in D3D8, so this should only be used for DX <=7
3855          * surfaces (with non-device palettes). */
3856         const PALETTEENTRY *pal = NULL;
3857
3858         if (surface->palette)
3859         {
3860             pal = surface->palette->palents;
3861         }
3862         else
3863         {
3864             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3865             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3866
3867             if (dds_primary && dds_primary->palette)
3868                 pal = dds_primary->palette->palents;
3869         }
3870
3871         if (pal)
3872         {
3873             RGBQUAD col[256];
3874             unsigned int i;
3875
3876             for (i = 0; i < 256; ++i)
3877             {
3878                 col[i].rgbRed = pal[i].peRed;
3879                 col[i].rgbGreen = pal[i].peGreen;
3880                 col[i].rgbBlue = pal[i].peBlue;
3881                 col[i].rgbReserved = 0;
3882             }
3883             SetDIBColorTable(surface->hDC, 0, 256, col);
3884         }
3885     }
3886
3887     surface->flags |= SFLAG_DCINUSE;
3888
3889     *dc = surface->hDC;
3890     TRACE("Returning dc %p.\n", *dc);
3891
3892     return WINED3D_OK;
3893 }
3894
3895 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3896 {
3897     TRACE("surface %p, dc %p.\n", surface, dc);
3898
3899     if (!(surface->flags & SFLAG_DCINUSE))
3900         return WINEDDERR_NODC;
3901
3902     if (surface->hDC != dc)
3903     {
3904         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3905                 dc, surface->hDC);
3906         return WINEDDERR_NODC;
3907     }
3908
3909     /* Copy the contents of the DIB over to the PBO. */
3910     if ((surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM)) && surface->resource.allocatedMemory)
3911         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
3912
3913     /* We locked first, so unlock now. */
3914     wined3d_surface_unmap(surface);
3915
3916     surface->flags &= ~SFLAG_DCINUSE;
3917
3918     return WINED3D_OK;
3919 }
3920
3921 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3922 {
3923     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3924
3925     if (flags)
3926     {
3927         static UINT once;
3928         if (!once++)
3929             FIXME("Ignoring flags %#x.\n", flags);
3930         else
3931             WARN("Ignoring flags %#x.\n", flags);
3932     }
3933
3934     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
3935     {
3936         ERR("Not supported on swapchain surfaces.\n");
3937         return WINEDDERR_NOTFLIPPABLE;
3938     }
3939
3940     /* Flipping is only supported on render targets and overlays. */
3941     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
3942     {
3943         WARN("Tried to flip a non-render target, non-overlay surface.\n");
3944         return WINEDDERR_NOTFLIPPABLE;
3945     }
3946
3947     flip_surface(surface, override);
3948
3949     /* Update overlays if they're visible. */
3950     if ((surface->resource.usage & WINED3DUSAGE_OVERLAY) && surface->overlay_dest)
3951         return surface_draw_overlay(surface);
3952
3953     return WINED3D_OK;
3954 }
3955
3956 /* Do not call while under the GL lock. */
3957 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3958 {
3959     struct wined3d_device *device = surface->resource.device;
3960
3961     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3962
3963     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3964     {
3965         struct wined3d_texture *texture = surface->container.u.texture;
3966
3967         TRACE("Passing to container (%p).\n", texture);
3968         texture->texture_ops->texture_preload(texture, srgb);
3969     }
3970     else
3971     {
3972         struct wined3d_context *context;
3973
3974         TRACE("(%p) : About to load surface\n", surface);
3975
3976         /* TODO: Use already acquired context when possible. */
3977         context = context_acquire(device, NULL);
3978
3979         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3980
3981         if (surface->resource.pool == WINED3D_POOL_DEFAULT)
3982         {
3983             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3984             GLclampf tmp;
3985             tmp = 0.9f;
3986             ENTER_GL();
3987             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3988             LEAVE_GL();
3989         }
3990
3991         context_release(context);
3992     }
3993 }
3994
3995 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3996 {
3997     if (!surface->resource.allocatedMemory)
3998     {
3999         if (!surface->resource.heapMemory)
4000         {
4001             if (!(surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
4002                     surface->resource.size + RESOURCE_ALIGNMENT)))
4003             {
4004                 ERR("Failed to allocate memory.\n");
4005                 return FALSE;
4006             }
4007         }
4008         else if (!(surface->flags & SFLAG_CLIENT))
4009         {
4010             ERR("Surface %p has heapMemory %p and flags %#x.\n",
4011                     surface, surface->resource.heapMemory, surface->flags);
4012         }
4013
4014         surface->resource.allocatedMemory =
4015             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
4016     }
4017     else
4018     {
4019         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
4020     }
4021
4022     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
4023
4024     return TRUE;
4025 }
4026
4027 /* Read the framebuffer back into the surface */
4028 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
4029 {
4030     struct wined3d_device *device = surface->resource.device;
4031     const struct wined3d_gl_info *gl_info;
4032     struct wined3d_context *context;
4033     BYTE *mem;
4034     GLint fmt;
4035     GLint type;
4036     BYTE *row, *top, *bottom;
4037     int i;
4038     BOOL bpp;
4039     RECT local_rect;
4040     BOOL srcIsUpsideDown;
4041     GLint rowLen = 0;
4042     GLint skipPix = 0;
4043     GLint skipRow = 0;
4044
4045     context = context_acquire(device, surface);
4046     context_apply_blit_state(context, device);
4047     gl_info = context->gl_info;
4048
4049     ENTER_GL();
4050
4051     /* Select the correct read buffer, and give some debug output.
4052      * There is no need to keep track of the current read buffer or reset it, every part of the code
4053      * that reads sets the read buffer as desired.
4054      */
4055     if (surface_is_offscreen(surface))
4056     {
4057         /* Mapping the primary render target which is not on a swapchain.
4058          * Read from the back buffer. */
4059         TRACE("Mapping offscreen render target.\n");
4060         glReadBuffer(device->offscreenBuffer);
4061         srcIsUpsideDown = TRUE;
4062     }
4063     else
4064     {
4065         /* Onscreen surfaces are always part of a swapchain */
4066         GLenum buffer = surface_get_gl_buffer(surface);
4067         TRACE("Mapping %#x buffer.\n", buffer);
4068         glReadBuffer(buffer);
4069         checkGLcall("glReadBuffer");
4070         srcIsUpsideDown = FALSE;
4071     }
4072
4073     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
4074     if (!rect)
4075     {
4076         local_rect.left = 0;
4077         local_rect.top = 0;
4078         local_rect.right = surface->resource.width;
4079         local_rect.bottom = surface->resource.height;
4080     }
4081     else
4082     {
4083         local_rect = *rect;
4084     }
4085     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
4086
4087     switch (surface->resource.format->id)
4088     {
4089         case WINED3DFMT_P8_UINT:
4090         {
4091             if (primary_render_target_is_p8(device))
4092             {
4093                 /* In case of P8 render targets the index is stored in the alpha component */
4094                 fmt = GL_ALPHA;
4095                 type = GL_UNSIGNED_BYTE;
4096                 mem = dest;
4097                 bpp = surface->resource.format->byte_count;
4098             }
4099             else
4100             {
4101                 /* GL can't return palettized data, so read ARGB pixels into a
4102                  * separate block of memory and convert them into palettized format
4103                  * in software. Slow, but if the app means to use palettized render
4104                  * targets and locks it...
4105                  *
4106                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4107                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4108                  * for the color channels when palettizing the colors.
4109                  */
4110                 fmt = GL_RGB;
4111                 type = GL_UNSIGNED_BYTE;
4112                 pitch *= 3;
4113                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4114                 if (!mem)
4115                 {
4116                     ERR("Out of memory\n");
4117                     LEAVE_GL();
4118                     return;
4119                 }
4120                 bpp = surface->resource.format->byte_count * 3;
4121             }
4122         }
4123         break;
4124
4125         default:
4126             mem = dest;
4127             fmt = surface->resource.format->glFormat;
4128             type = surface->resource.format->glType;
4129             bpp = surface->resource.format->byte_count;
4130     }
4131
4132     if (surface->flags & SFLAG_PBO)
4133     {
4134         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4135         checkGLcall("glBindBufferARB");
4136         if (mem)
4137         {
4138             ERR("mem not null for pbo -- unexpected\n");
4139             mem = NULL;
4140         }
4141     }
4142
4143     /* Save old pixel store pack state */
4144     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4145     checkGLcall("glGetIntegerv");
4146     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4147     checkGLcall("glGetIntegerv");
4148     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4149     checkGLcall("glGetIntegerv");
4150
4151     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4152     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4153     checkGLcall("glPixelStorei");
4154     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4155     checkGLcall("glPixelStorei");
4156     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4157     checkGLcall("glPixelStorei");
4158
4159     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4160             local_rect.right - local_rect.left,
4161             local_rect.bottom - local_rect.top,
4162             fmt, type, mem);
4163     checkGLcall("glReadPixels");
4164
4165     /* Reset previous pixel store pack state */
4166     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4167     checkGLcall("glPixelStorei");
4168     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4169     checkGLcall("glPixelStorei");
4170     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4171     checkGLcall("glPixelStorei");
4172
4173     if (surface->flags & SFLAG_PBO)
4174     {
4175         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4176         checkGLcall("glBindBufferARB");
4177
4178         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4179          * to get a pointer to it and perform the flipping in software. This is a lot
4180          * faster than calling glReadPixels for each line. In case we want more speed
4181          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4182         if (!srcIsUpsideDown)
4183         {
4184             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4185             checkGLcall("glBindBufferARB");
4186
4187             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4188             checkGLcall("glMapBufferARB");
4189         }
4190     }
4191
4192     /* TODO: Merge this with the palettization loop below for P8 targets */
4193     if(!srcIsUpsideDown) {
4194         UINT len, off;
4195         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4196             Flip the lines in software */
4197         len = (local_rect.right - local_rect.left) * bpp;
4198         off = local_rect.left * bpp;
4199
4200         row = HeapAlloc(GetProcessHeap(), 0, len);
4201         if(!row) {
4202             ERR("Out of memory\n");
4203             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4204                 HeapFree(GetProcessHeap(), 0, mem);
4205             LEAVE_GL();
4206             return;
4207         }
4208
4209         top = mem + pitch * local_rect.top;
4210         bottom = mem + pitch * (local_rect.bottom - 1);
4211         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4212             memcpy(row, top + off, len);
4213             memcpy(top + off, bottom + off, len);
4214             memcpy(bottom + off, row, len);
4215             top += pitch;
4216             bottom -= pitch;
4217         }
4218         HeapFree(GetProcessHeap(), 0, row);
4219
4220         /* Unmap the temp PBO buffer */
4221         if (surface->flags & SFLAG_PBO)
4222         {
4223             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4224             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4225         }
4226     }
4227
4228     LEAVE_GL();
4229     context_release(context);
4230
4231     /* For P8 textures we need to perform an inverse palette lookup. This is
4232      * done by searching for a palette index which matches the RGB value.
4233      * Note this isn't guaranteed to work when there are multiple entries for
4234      * the same color but we have no choice. In case of P8 render targets,
4235      * the index is stored in the alpha component so no conversion is needed. */
4236     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4237     {
4238         const PALETTEENTRY *pal = NULL;
4239         DWORD width = pitch / 3;
4240         int x, y, c;
4241
4242         if (surface->palette)
4243         {
4244             pal = surface->palette->palents;
4245         }
4246         else
4247         {
4248             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4249             HeapFree(GetProcessHeap(), 0, mem);
4250             return;
4251         }
4252
4253         for(y = local_rect.top; y < local_rect.bottom; y++) {
4254             for(x = local_rect.left; x < local_rect.right; x++) {
4255                 /*                      start              lines            pixels      */
4256                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4257                 const BYTE *green = blue  + 1;
4258                 const BYTE *red = green + 1;
4259
4260                 for(c = 0; c < 256; c++) {
4261                     if(*red   == pal[c].peRed   &&
4262                        *green == pal[c].peGreen &&
4263                        *blue  == pal[c].peBlue)
4264                     {
4265                         *((BYTE *) dest + y * width + x) = c;
4266                         break;
4267                     }
4268                 }
4269             }
4270         }
4271         HeapFree(GetProcessHeap(), 0, mem);
4272     }
4273 }
4274
4275 /* Read the framebuffer contents into a texture. Note that this function
4276  * doesn't do any kind of flipping. Using this on an onscreen surface will
4277  * result in a flipped D3D texture. */
4278 void surface_load_fb_texture(struct wined3d_surface *surface, BOOL srgb)
4279 {
4280     struct wined3d_device *device = surface->resource.device;
4281     struct wined3d_context *context;
4282
4283     context = context_acquire(device, surface);
4284     device_invalidate_state(device, STATE_FRAMEBUFFER);
4285
4286     surface_prepare_texture(surface, context, srgb);
4287     surface_bind_and_dirtify(surface, context, srgb);
4288
4289     TRACE("Reading back offscreen render target %p.\n", surface);
4290
4291     ENTER_GL();
4292
4293     if (surface_is_offscreen(surface))
4294         glReadBuffer(device->offscreenBuffer);
4295     else
4296         glReadBuffer(surface_get_gl_buffer(surface));
4297     checkGLcall("glReadBuffer");
4298
4299     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4300             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4301     checkGLcall("glCopyTexSubImage2D");
4302
4303     LEAVE_GL();
4304
4305     context_release(context);
4306 }
4307
4308 /* Context activation is done by the caller. */
4309 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4310         struct wined3d_context *context, BOOL srgb)
4311 {
4312     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4313     enum wined3d_conversion_type convert;
4314     struct wined3d_format format;
4315
4316     if (surface->flags & alloc_flag) return;
4317
4318     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4319     if (convert != WINED3D_CT_NONE || format.convert)
4320         surface->flags |= SFLAG_CONVERTED;
4321     else surface->flags &= ~SFLAG_CONVERTED;
4322
4323     surface_bind_and_dirtify(surface, context, srgb);
4324     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4325     surface->flags |= alloc_flag;
4326 }
4327
4328 /* Context activation is done by the caller. */
4329 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4330 {
4331     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4332     {
4333         struct wined3d_texture *texture = surface->container.u.texture;
4334         UINT sub_count = texture->level_count * texture->layer_count;
4335         UINT i;
4336
4337         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4338
4339         for (i = 0; i < sub_count; ++i)
4340         {
4341             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4342             surface_prepare_texture_internal(s, context, srgb);
4343         }
4344
4345         return;
4346     }
4347
4348     surface_prepare_texture_internal(surface, context, srgb);
4349 }
4350
4351 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4352 {
4353     if (multisample)
4354     {
4355         if (surface->rb_multisample)
4356             return;
4357
4358         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4359         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4360         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4361                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4362         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4363     }
4364     else
4365     {
4366         if (surface->rb_resolved)
4367             return;
4368
4369         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4370         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4371         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4372                 surface->pow2Width, surface->pow2Height);
4373         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4374     }
4375 }
4376
4377 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4378         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4379 {
4380     struct wined3d_device *device = surface->resource.device;
4381     UINT pitch = wined3d_surface_get_pitch(surface);
4382     const struct wined3d_gl_info *gl_info;
4383     struct wined3d_context *context;
4384     RECT local_rect;
4385     UINT w, h;
4386
4387     surface_get_rect(surface, rect, &local_rect);
4388
4389     mem += local_rect.top * pitch + local_rect.left * bpp;
4390     w = local_rect.right - local_rect.left;
4391     h = local_rect.bottom - local_rect.top;
4392
4393     /* Activate the correct context for the render target */
4394     context = context_acquire(device, surface);
4395     context_apply_blit_state(context, device);
4396     gl_info = context->gl_info;
4397
4398     ENTER_GL();
4399
4400     if (!surface_is_offscreen(surface))
4401     {
4402         GLenum buffer = surface_get_gl_buffer(surface);
4403         TRACE("Unlocking %#x buffer.\n", buffer);
4404         context_set_draw_buffer(context, buffer);
4405
4406         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4407         glPixelZoom(1.0f, -1.0f);
4408     }
4409     else
4410     {
4411         /* Primary offscreen render target */
4412         TRACE("Offscreen render target.\n");
4413         context_set_draw_buffer(context, device->offscreenBuffer);
4414
4415         glPixelZoom(1.0f, 1.0f);
4416     }
4417
4418     glRasterPos3i(local_rect.left, local_rect.top, 1);
4419     checkGLcall("glRasterPos3i");
4420
4421     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4422     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4423
4424     if (surface->flags & SFLAG_PBO)
4425     {
4426         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4427         checkGLcall("glBindBufferARB");
4428     }
4429
4430     glDrawPixels(w, h, fmt, type, mem);
4431     checkGLcall("glDrawPixels");
4432
4433     if (surface->flags & SFLAG_PBO)
4434     {
4435         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4436         checkGLcall("glBindBufferARB");
4437     }
4438
4439     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4440     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4441
4442     LEAVE_GL();
4443
4444     if (wined3d_settings.strict_draw_ordering
4445             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4446             && surface->container.u.swapchain->front_buffer == surface))
4447         wglFlush();
4448
4449     context_release(context);
4450 }
4451
4452 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck, BOOL use_texturing,
4453         struct wined3d_format *format, enum wined3d_conversion_type *conversion_type)
4454 {
4455     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4456     const struct wined3d_device *device = surface->resource.device;
4457     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4458     BOOL blit_supported = FALSE;
4459
4460     /* Copy the default values from the surface. Below we might perform fixups */
4461     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4462     *format = *surface->resource.format;
4463     *conversion_type = WINED3D_CT_NONE;
4464
4465     /* Ok, now look if we have to do any conversion */
4466     switch (surface->resource.format->id)
4467     {
4468         case WINED3DFMT_P8_UINT:
4469             /* Below the call to blit_supported is disabled for Wine 1.2
4470              * because the function isn't operating correctly yet. At the
4471              * moment 8-bit blits are handled in software and if certain GL
4472              * extensions are around, surface conversion is performed at
4473              * upload time. The blit_supported call recognizes it as a
4474              * destination fixup. This type of upload 'fixup' and 8-bit to
4475              * 8-bit blits need to be handled by the blit_shader.
4476              * TODO: get rid of this #if 0. */
4477 #if 0
4478             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4479                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4480                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4481 #endif
4482             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4483
4484             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4485              * texturing. Further also use conversion in case of color keying.
4486              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4487              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4488              * conflicts with this.
4489              */
4490             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4491                     || colorkey_active || !use_texturing)
4492             {
4493                 format->glFormat = GL_RGBA;
4494                 format->glInternal = GL_RGBA;
4495                 format->glType = GL_UNSIGNED_BYTE;
4496                 format->conv_byte_count = 4;
4497                 if (colorkey_active)
4498                     *conversion_type = WINED3D_CT_PALETTED_CK;
4499                 else
4500                     *conversion_type = WINED3D_CT_PALETTED;
4501             }
4502             break;
4503
4504         case WINED3DFMT_B2G3R3_UNORM:
4505             /* **********************
4506                 GL_UNSIGNED_BYTE_3_3_2
4507                 ********************** */
4508             if (colorkey_active) {
4509                 /* This texture format will never be used.. So do not care about color keying
4510                     up until the point in time it will be needed :-) */
4511                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4512             }
4513             break;
4514
4515         case WINED3DFMT_B5G6R5_UNORM:
4516             if (colorkey_active)
4517             {
4518                 *conversion_type = WINED3D_CT_CK_565;
4519                 format->glFormat = GL_RGBA;
4520                 format->glInternal = GL_RGB5_A1;
4521                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4522                 format->conv_byte_count = 2;
4523             }
4524             break;
4525
4526         case WINED3DFMT_B5G5R5X1_UNORM:
4527             if (colorkey_active)
4528             {
4529                 *conversion_type = WINED3D_CT_CK_5551;
4530                 format->glFormat = GL_BGRA;
4531                 format->glInternal = GL_RGB5_A1;
4532                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4533                 format->conv_byte_count = 2;
4534             }
4535             break;
4536
4537         case WINED3DFMT_B8G8R8_UNORM:
4538             if (colorkey_active)
4539             {
4540                 *conversion_type = WINED3D_CT_CK_RGB24;
4541                 format->glFormat = GL_RGBA;
4542                 format->glInternal = GL_RGBA8;
4543                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4544                 format->conv_byte_count = 4;
4545             }
4546             break;
4547
4548         case WINED3DFMT_B8G8R8X8_UNORM:
4549             if (colorkey_active)
4550             {
4551                 *conversion_type = WINED3D_CT_RGB32_888;
4552                 format->glFormat = GL_RGBA;
4553                 format->glInternal = GL_RGBA8;
4554                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4555                 format->conv_byte_count = 4;
4556             }
4557             break;
4558
4559         case WINED3DFMT_B8G8R8A8_UNORM:
4560             if (colorkey_active)
4561             {
4562                 *conversion_type = WINED3D_CT_CK_ARGB32;
4563                 format->conv_byte_count = 4;
4564             }
4565             break;
4566
4567         default:
4568             break;
4569     }
4570
4571     if (*conversion_type != WINED3D_CT_NONE)
4572     {
4573         format->rtInternal = format->glInternal;
4574         format->glGammaInternal = format->glInternal;
4575     }
4576
4577     return WINED3D_OK;
4578 }
4579
4580 static BOOL color_in_range(const struct wined3d_color_key *color_key, DWORD color)
4581 {
4582     /* FIXME: Is this really how color keys are supposed to work? I think it
4583      * makes more sense to compare the individual channels. */
4584     return color >= color_key->color_space_low_value
4585             && color <= color_key->color_space_high_value;
4586 }
4587
4588 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4589 {
4590     const struct wined3d_device *device = surface->resource.device;
4591     const struct wined3d_palette *pal = surface->palette;
4592     BOOL index_in_alpha = FALSE;
4593     unsigned int i;
4594
4595     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4596      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4597      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4598      * duplicate entries. Store the color key in the unused alpha component to speed the
4599      * download up and to make conversion unneeded. */
4600     index_in_alpha = primary_render_target_is_p8(device);
4601
4602     if (!pal)
4603     {
4604         ERR("This code should never get entered for DirectDraw!, expect problems\n");
4605         if (index_in_alpha)
4606         {
4607             /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4608              * there's no palette at this time. */
4609             for (i = 0; i < 256; i++) table[i][3] = i;
4610         }
4611     }
4612     else
4613     {
4614         TRACE("Using surface palette %p\n", pal);
4615         /* Get the surface's palette */
4616         for (i = 0; i < 256; ++i)
4617         {
4618             table[i][0] = pal->palents[i].peRed;
4619             table[i][1] = pal->palents[i].peGreen;
4620             table[i][2] = pal->palents[i].peBlue;
4621
4622             /* When index_in_alpha is set the palette index is stored in the
4623              * alpha component. In case of a readback we can then read
4624              * GL_ALPHA. Color keying is handled in BltOverride using a
4625              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4626              * color key itself is passed to glAlphaFunc in other cases the
4627              * alpha component of pixels that should be masked away is set to 0. */
4628             if (index_in_alpha)
4629                 table[i][3] = i;
4630             else if (colorkey && color_in_range(&surface->src_blt_color_key, i))
4631                 table[i][3] = 0x00;
4632             else if (pal->flags & WINEDDPCAPS_ALPHA)
4633                 table[i][3] = pal->palents[i].peFlags;
4634             else
4635                 table[i][3] = 0xFF;
4636         }
4637     }
4638 }
4639
4640 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width, UINT height,
4641         UINT outpitch, enum wined3d_conversion_type conversion_type, struct wined3d_surface *surface)
4642 {
4643     const BYTE *source;
4644     BYTE *dest;
4645
4646     TRACE("src %p, dst %p, pitch %u, width %u, height %u, outpitch %u, conversion_type %#x, surface %p.\n",
4647             src, dst, pitch, width, height, outpitch, conversion_type, surface);
4648
4649     switch (conversion_type)
4650     {
4651         case WINED3D_CT_NONE:
4652         {
4653             memcpy(dst, src, pitch * height);
4654             break;
4655         }
4656
4657         case WINED3D_CT_PALETTED:
4658         case WINED3D_CT_PALETTED_CK:
4659         {
4660             BYTE table[256][4];
4661             unsigned int x, y;
4662
4663             d3dfmt_p8_init_palette(surface, table, (conversion_type == WINED3D_CT_PALETTED_CK));
4664
4665             for (y = 0; y < height; y++)
4666             {
4667                 source = src + pitch * y;
4668                 dest = dst + outpitch * y;
4669                 /* This is an 1 bpp format, using the width here is fine */
4670                 for (x = 0; x < width; x++) {
4671                     BYTE color = *source++;
4672                     *dest++ = table[color][0];
4673                     *dest++ = table[color][1];
4674                     *dest++ = table[color][2];
4675                     *dest++ = table[color][3];
4676                 }
4677             }
4678         }
4679         break;
4680
4681         case WINED3D_CT_CK_565:
4682         {
4683             /* Converting the 565 format in 5551 packed to emulate color-keying.
4684
4685               Note : in all these conversion, it would be best to average the averaging
4686                       pixels to get the color of the pixel that will be color-keyed to
4687                       prevent 'color bleeding'. This will be done later on if ever it is
4688                       too visible.
4689
4690               Note2: Nvidia documents say that their driver does not support alpha + color keying
4691                      on the same surface and disables color keying in such a case
4692             */
4693             unsigned int x, y;
4694             const WORD *Source;
4695             WORD *Dest;
4696
4697             TRACE("Color keyed 565\n");
4698
4699             for (y = 0; y < height; y++) {
4700                 Source = (const WORD *)(src + y * pitch);
4701                 Dest = (WORD *) (dst + y * outpitch);
4702                 for (x = 0; x < width; x++ ) {
4703                     WORD color = *Source++;
4704                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4705                     if (!color_in_range(&surface->src_blt_color_key, color))
4706                         *Dest |= 0x0001;
4707                     Dest++;
4708                 }
4709             }
4710         }
4711         break;
4712
4713         case WINED3D_CT_CK_5551:
4714         {
4715             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4716             unsigned int x, y;
4717             const WORD *Source;
4718             WORD *Dest;
4719             TRACE("Color keyed 5551\n");
4720             for (y = 0; y < height; y++) {
4721                 Source = (const WORD *)(src + y * pitch);
4722                 Dest = (WORD *) (dst + y * outpitch);
4723                 for (x = 0; x < width; x++ ) {
4724                     WORD color = *Source++;
4725                     *Dest = color;
4726                     if (!color_in_range(&surface->src_blt_color_key, color))
4727                         *Dest |= (1 << 15);
4728                     else
4729                         *Dest &= ~(1 << 15);
4730                     Dest++;
4731                 }
4732             }
4733         }
4734         break;
4735
4736         case WINED3D_CT_CK_RGB24:
4737         {
4738             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4739             unsigned int x, y;
4740             for (y = 0; y < height; y++)
4741             {
4742                 source = src + pitch * y;
4743                 dest = dst + outpitch * y;
4744                 for (x = 0; x < width; x++) {
4745                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4746                     DWORD dstcolor = color << 8;
4747                     if (!color_in_range(&surface->src_blt_color_key, color))
4748                         dstcolor |= 0xff;
4749                     *(DWORD*)dest = dstcolor;
4750                     source += 3;
4751                     dest += 4;
4752                 }
4753             }
4754         }
4755         break;
4756
4757         case WINED3D_CT_RGB32_888:
4758         {
4759             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4760             unsigned int x, y;
4761             for (y = 0; y < height; y++)
4762             {
4763                 source = src + pitch * y;
4764                 dest = dst + outpitch * y;
4765                 for (x = 0; x < width; x++) {
4766                     DWORD color = 0xffffff & *(const DWORD*)source;
4767                     DWORD dstcolor = color << 8;
4768                     if (!color_in_range(&surface->src_blt_color_key, color))
4769                         dstcolor |= 0xff;
4770                     *(DWORD*)dest = dstcolor;
4771                     source += 4;
4772                     dest += 4;
4773                 }
4774             }
4775         }
4776         break;
4777
4778         case WINED3D_CT_CK_ARGB32:
4779         {
4780             unsigned int x, y;
4781             for (y = 0; y < height; ++y)
4782             {
4783                 source = src + pitch * y;
4784                 dest = dst + outpitch * y;
4785                 for (x = 0; x < width; ++x)
4786                 {
4787                     DWORD color = *(const DWORD *)source;
4788                     if (color_in_range(&surface->src_blt_color_key, color))
4789                         color &= ~0xff000000;
4790                     *(DWORD*)dest = color;
4791                     source += 4;
4792                     dest += 4;
4793                 }
4794             }
4795         }
4796         break;
4797
4798         default:
4799             ERR("Unsupported conversion type %#x.\n", conversion_type);
4800     }
4801     return WINED3D_OK;
4802 }
4803
4804 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4805 {
4806     /* Flip the surface contents */
4807     /* Flip the DC */
4808     {
4809         HDC tmp;
4810         tmp = front->hDC;
4811         front->hDC = back->hDC;
4812         back->hDC = tmp;
4813     }
4814
4815     /* Flip the DIBsection */
4816     {
4817         HBITMAP tmp = front->dib.DIBsection;
4818         front->dib.DIBsection = back->dib.DIBsection;
4819         back->dib.DIBsection = tmp;
4820     }
4821
4822     /* Flip the surface data */
4823     {
4824         void* tmp;
4825
4826         tmp = front->dib.bitmap_data;
4827         front->dib.bitmap_data = back->dib.bitmap_data;
4828         back->dib.bitmap_data = tmp;
4829
4830         tmp = front->resource.allocatedMemory;
4831         front->resource.allocatedMemory = back->resource.allocatedMemory;
4832         back->resource.allocatedMemory = tmp;
4833
4834         tmp = front->resource.heapMemory;
4835         front->resource.heapMemory = back->resource.heapMemory;
4836         back->resource.heapMemory = tmp;
4837     }
4838
4839     /* Flip the PBO */
4840     {
4841         GLuint tmp_pbo = front->pbo;
4842         front->pbo = back->pbo;
4843         back->pbo = tmp_pbo;
4844     }
4845
4846     /* Flip the opengl texture */
4847     {
4848         GLuint tmp;
4849
4850         tmp = back->texture_name;
4851         back->texture_name = front->texture_name;
4852         front->texture_name = tmp;
4853
4854         tmp = back->texture_name_srgb;
4855         back->texture_name_srgb = front->texture_name_srgb;
4856         front->texture_name_srgb = tmp;
4857
4858         tmp = back->rb_multisample;
4859         back->rb_multisample = front->rb_multisample;
4860         front->rb_multisample = tmp;
4861
4862         tmp = back->rb_resolved;
4863         back->rb_resolved = front->rb_resolved;
4864         front->rb_resolved = tmp;
4865
4866         resource_unload(&back->resource);
4867         resource_unload(&front->resource);
4868     }
4869
4870     {
4871         DWORD tmp_flags = back->flags;
4872         back->flags = front->flags;
4873         front->flags = tmp_flags;
4874     }
4875 }
4876
4877 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4878  * pixel copy calls. */
4879 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4880         const RECT *src_rect, const RECT *dst_rect_in, enum wined3d_texture_filter_type filter)
4881 {
4882     struct wined3d_device *device = dst_surface->resource.device;
4883     float xrel, yrel;
4884     UINT row;
4885     struct wined3d_context *context;
4886     BOOL upsidedown = FALSE;
4887     RECT dst_rect = *dst_rect_in;
4888
4889     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4890      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4891      */
4892     if(dst_rect.top > dst_rect.bottom) {
4893         UINT tmp = dst_rect.bottom;
4894         dst_rect.bottom = dst_rect.top;
4895         dst_rect.top = tmp;
4896         upsidedown = TRUE;
4897     }
4898
4899     context = context_acquire(device, src_surface);
4900     context_apply_blit_state(context, device);
4901     surface_internal_preload(dst_surface, SRGB_RGB);
4902     ENTER_GL();
4903
4904     /* Bind the target texture */
4905     context_bind_texture(context, dst_surface->texture_target, dst_surface->texture_name);
4906     if (surface_is_offscreen(src_surface))
4907     {
4908         TRACE("Reading from an offscreen target\n");
4909         upsidedown = !upsidedown;
4910         glReadBuffer(device->offscreenBuffer);
4911     }
4912     else
4913     {
4914         glReadBuffer(surface_get_gl_buffer(src_surface));
4915     }
4916     checkGLcall("glReadBuffer");
4917
4918     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4919     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4920
4921     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4922     {
4923         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4924
4925         if (filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT)
4926             ERR("Texture filtering not supported in direct blit.\n");
4927     }
4928     else if ((filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT)
4929             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4930     {
4931         ERR("Texture filtering not supported in direct blit\n");
4932     }
4933
4934     if (upsidedown
4935             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4936             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4937     {
4938         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4939
4940         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4941                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4942                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4943                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4944     }
4945     else
4946     {
4947         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4948         /* I have to process this row by row to swap the image,
4949          * otherwise it would be upside down, so stretching in y direction
4950          * doesn't cost extra time
4951          *
4952          * However, stretching in x direction can be avoided if not necessary
4953          */
4954         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4955             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4956             {
4957                 /* Well, that stuff works, but it's very slow.
4958                  * find a better way instead
4959                  */
4960                 UINT col;
4961
4962                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4963                 {
4964                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4965                             dst_rect.left + col /* x offset */, row /* y offset */,
4966                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4967                 }
4968             }
4969             else
4970             {
4971                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4972                         dst_rect.left /* x offset */, row /* y offset */,
4973                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4974             }
4975         }
4976     }
4977     checkGLcall("glCopyTexSubImage2D");
4978
4979     LEAVE_GL();
4980     context_release(context);
4981
4982     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4983      * path is never entered
4984      */
4985     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4986 }
4987
4988 /* Uses the hardware to stretch and flip the image */
4989 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4990         const RECT *src_rect, const RECT *dst_rect_in, enum wined3d_texture_filter_type filter)
4991 {
4992     struct wined3d_device *device = dst_surface->resource.device;
4993     struct wined3d_swapchain *src_swapchain = NULL;
4994     GLuint src, backup = 0;
4995     float left, right, top, bottom; /* Texture coordinates */
4996     UINT fbwidth = src_surface->resource.width;
4997     UINT fbheight = src_surface->resource.height;
4998     struct wined3d_context *context;
4999     GLenum drawBuffer = GL_BACK;
5000     GLenum texture_target;
5001     BOOL noBackBufferBackup;
5002     BOOL src_offscreen;
5003     BOOL upsidedown = FALSE;
5004     RECT dst_rect = *dst_rect_in;
5005
5006     TRACE("Using hwstretch blit\n");
5007     /* Activate the Proper context for reading from the source surface, set it up for blitting */
5008     context = context_acquire(device, src_surface);
5009     context_apply_blit_state(context, device);
5010     surface_internal_preload(dst_surface, SRGB_RGB);
5011
5012     src_offscreen = surface_is_offscreen(src_surface);
5013     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
5014     if (!noBackBufferBackup && !src_surface->texture_name)
5015     {
5016         /* Get it a description */
5017         surface_internal_preload(src_surface, SRGB_RGB);
5018     }
5019     ENTER_GL();
5020
5021     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
5022      * This way we don't have to wait for the 2nd readback to finish to leave this function.
5023      */
5024     if (context->aux_buffers >= 2)
5025     {
5026         /* Got more than one aux buffer? Use the 2nd aux buffer */
5027         drawBuffer = GL_AUX1;
5028     }
5029     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
5030     {
5031         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
5032         drawBuffer = GL_AUX0;
5033     }
5034
5035     if(noBackBufferBackup) {
5036         glGenTextures(1, &backup);
5037         checkGLcall("glGenTextures");
5038         context_bind_texture(context, GL_TEXTURE_2D, backup);
5039         texture_target = GL_TEXTURE_2D;
5040     } else {
5041         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
5042          * we are reading from the back buffer, the backup can be used as source texture
5043          */
5044         texture_target = src_surface->texture_target;
5045         context_bind_texture(context, texture_target, src_surface->texture_name);
5046         glEnable(texture_target);
5047         checkGLcall("glEnable(texture_target)");
5048
5049         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
5050         src_surface->flags &= ~SFLAG_INTEXTURE;
5051     }
5052
5053     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
5054      * glCopyTexSubImage is a bit picky about the parameters we pass to it
5055      */
5056     if(dst_rect.top > dst_rect.bottom) {
5057         UINT tmp = dst_rect.bottom;
5058         dst_rect.bottom = dst_rect.top;
5059         dst_rect.top = tmp;
5060         upsidedown = TRUE;
5061     }
5062
5063     if (src_offscreen)
5064     {
5065         TRACE("Reading from an offscreen target\n");
5066         upsidedown = !upsidedown;
5067         glReadBuffer(device->offscreenBuffer);
5068     }
5069     else
5070     {
5071         glReadBuffer(surface_get_gl_buffer(src_surface));
5072     }
5073
5074     /* TODO: Only back up the part that will be overwritten */
5075     glCopyTexSubImage2D(texture_target, 0,
5076                         0, 0 /* read offsets */,
5077                         0, 0,
5078                         fbwidth,
5079                         fbheight);
5080
5081     checkGLcall("glCopyTexSubImage2D");
5082
5083     /* No issue with overriding these - the sampler is dirty due to blit usage */
5084     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
5085             wined3d_gl_mag_filter(magLookup, filter));
5086     checkGLcall("glTexParameteri");
5087     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
5088             wined3d_gl_min_mip_filter(minMipLookup, filter, WINED3D_TEXF_NONE));
5089     checkGLcall("glTexParameteri");
5090
5091     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5092         src_swapchain = src_surface->container.u.swapchain;
5093     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
5094     {
5095         src = backup ? backup : src_surface->texture_name;
5096     }
5097     else
5098     {
5099         glReadBuffer(GL_FRONT);
5100         checkGLcall("glReadBuffer(GL_FRONT)");
5101
5102         glGenTextures(1, &src);
5103         checkGLcall("glGenTextures(1, &src)");
5104         context_bind_texture(context, GL_TEXTURE_2D, src);
5105
5106         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5107          * out for power of 2 sizes
5108          */
5109         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5110                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5111         checkGLcall("glTexImage2D");
5112         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
5113                             0, 0 /* read offsets */,
5114                             0, 0,
5115                             fbwidth,
5116                             fbheight);
5117
5118         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5119         checkGLcall("glTexParameteri");
5120         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5121         checkGLcall("glTexParameteri");
5122
5123         glReadBuffer(GL_BACK);
5124         checkGLcall("glReadBuffer(GL_BACK)");
5125
5126         if(texture_target != GL_TEXTURE_2D) {
5127             glDisable(texture_target);
5128             glEnable(GL_TEXTURE_2D);
5129             texture_target = GL_TEXTURE_2D;
5130         }
5131     }
5132     checkGLcall("glEnd and previous");
5133
5134     left = src_rect->left;
5135     right = src_rect->right;
5136
5137     if (!upsidedown)
5138     {
5139         top = src_surface->resource.height - src_rect->top;
5140         bottom = src_surface->resource.height - src_rect->bottom;
5141     }
5142     else
5143     {
5144         top = src_surface->resource.height - src_rect->bottom;
5145         bottom = src_surface->resource.height - src_rect->top;
5146     }
5147
5148     if (src_surface->flags & SFLAG_NORMCOORD)
5149     {
5150         left /= src_surface->pow2Width;
5151         right /= src_surface->pow2Width;
5152         top /= src_surface->pow2Height;
5153         bottom /= src_surface->pow2Height;
5154     }
5155
5156     /* draw the source texture stretched and upside down. The correct surface is bound already */
5157     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5158     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5159
5160     context_set_draw_buffer(context, drawBuffer);
5161     glReadBuffer(drawBuffer);
5162
5163     glBegin(GL_QUADS);
5164         /* bottom left */
5165         glTexCoord2f(left, bottom);
5166         glVertex2i(0, 0);
5167
5168         /* top left */
5169         glTexCoord2f(left, top);
5170         glVertex2i(0, dst_rect.bottom - dst_rect.top);
5171
5172         /* top right */
5173         glTexCoord2f(right, top);
5174         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5175
5176         /* bottom right */
5177         glTexCoord2f(right, bottom);
5178         glVertex2i(dst_rect.right - dst_rect.left, 0);
5179     glEnd();
5180     checkGLcall("glEnd and previous");
5181
5182     if (texture_target != dst_surface->texture_target)
5183     {
5184         glDisable(texture_target);
5185         glEnable(dst_surface->texture_target);
5186         texture_target = dst_surface->texture_target;
5187     }
5188
5189     /* Now read the stretched and upside down image into the destination texture */
5190     context_bind_texture(context, texture_target, dst_surface->texture_name);
5191     glCopyTexSubImage2D(texture_target,
5192                         0,
5193                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5194                         0, 0, /* We blitted the image to the origin */
5195                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5196     checkGLcall("glCopyTexSubImage2D");
5197
5198     if(drawBuffer == GL_BACK) {
5199         /* Write the back buffer backup back */
5200         if(backup) {
5201             if(texture_target != GL_TEXTURE_2D) {
5202                 glDisable(texture_target);
5203                 glEnable(GL_TEXTURE_2D);
5204                 texture_target = GL_TEXTURE_2D;
5205             }
5206             context_bind_texture(context, GL_TEXTURE_2D, backup);
5207         }
5208         else
5209         {
5210             if (texture_target != src_surface->texture_target)
5211             {
5212                 glDisable(texture_target);
5213                 glEnable(src_surface->texture_target);
5214                 texture_target = src_surface->texture_target;
5215             }
5216             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5217         }
5218
5219         glBegin(GL_QUADS);
5220             /* top left */
5221             glTexCoord2f(0.0f, 0.0f);
5222             glVertex2i(0, fbheight);
5223
5224             /* bottom left */
5225             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5226             glVertex2i(0, 0);
5227
5228             /* bottom right */
5229             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5230                     (float)fbheight / (float)src_surface->pow2Height);
5231             glVertex2i(fbwidth, 0);
5232
5233             /* top right */
5234             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5235             glVertex2i(fbwidth, fbheight);
5236         glEnd();
5237     }
5238     glDisable(texture_target);
5239     checkGLcall("glDisable(texture_target)");
5240
5241     /* Cleanup */
5242     if (src != src_surface->texture_name && src != backup)
5243     {
5244         glDeleteTextures(1, &src);
5245         checkGLcall("glDeleteTextures(1, &src)");
5246     }
5247     if(backup) {
5248         glDeleteTextures(1, &backup);
5249         checkGLcall("glDeleteTextures(1, &backup)");
5250     }
5251
5252     LEAVE_GL();
5253
5254     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5255
5256     context_release(context);
5257
5258     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5259      * path is never entered
5260      */
5261     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5262 }
5263
5264 /* Front buffer coordinates are always full screen coordinates, but our GL
5265  * drawable is limited to the window's client area. The sysmem and texture
5266  * copies do have the full screen size. Note that GL has a bottom-left
5267  * origin, while D3D has a top-left origin. */
5268 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5269 {
5270     UINT drawable_height;
5271
5272     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5273             && surface == surface->container.u.swapchain->front_buffer)
5274     {
5275         POINT offset = {0, 0};
5276         RECT windowsize;
5277
5278         ScreenToClient(window, &offset);
5279         OffsetRect(rect, offset.x, offset.y);
5280
5281         GetClientRect(window, &windowsize);
5282         drawable_height = windowsize.bottom - windowsize.top;
5283     }
5284     else
5285     {
5286         drawable_height = surface->resource.height;
5287     }
5288
5289     rect->top = drawable_height - rect->top;
5290     rect->bottom = drawable_height - rect->bottom;
5291 }
5292
5293 static void surface_blt_to_drawable(const struct wined3d_device *device,
5294         enum wined3d_texture_filter_type filter, BOOL color_key,
5295         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5296         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5297 {
5298     struct wined3d_context *context;
5299     RECT src_rect, dst_rect;
5300
5301     src_rect = *src_rect_in;
5302     dst_rect = *dst_rect_in;
5303
5304     /* Make sure the surface is up-to-date. This should probably use
5305      * surface_load_location() and worry about the destination surface too,
5306      * unless we're overwriting it completely. */
5307     surface_internal_preload(src_surface, SRGB_RGB);
5308
5309     /* Activate the destination context, set it up for blitting */
5310     context = context_acquire(device, dst_surface);
5311     context_apply_blit_state(context, device);
5312
5313     if (!surface_is_offscreen(dst_surface))
5314         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5315
5316     device->blitter->set_shader(device->blit_priv, context, src_surface);
5317
5318     ENTER_GL();
5319
5320     if (color_key)
5321     {
5322         glEnable(GL_ALPHA_TEST);
5323         checkGLcall("glEnable(GL_ALPHA_TEST)");
5324
5325         /* When the primary render target uses P8, the alpha component
5326          * contains the palette index. Which means that the colorkey is one of
5327          * the palette entries. In other cases pixels that should be masked
5328          * away have alpha set to 0. */
5329         if (primary_render_target_is_p8(device))
5330             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->src_blt_color_key.color_space_low_value / 256.0f);
5331         else
5332             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5333         checkGLcall("glAlphaFunc");
5334     }
5335     else
5336     {
5337         glDisable(GL_ALPHA_TEST);
5338         checkGLcall("glDisable(GL_ALPHA_TEST)");
5339     }
5340
5341     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5342
5343     if (color_key)
5344     {
5345         glDisable(GL_ALPHA_TEST);
5346         checkGLcall("glDisable(GL_ALPHA_TEST)");
5347     }
5348
5349     LEAVE_GL();
5350
5351     /* Leave the opengl state valid for blitting */
5352     device->blitter->unset_shader(context->gl_info);
5353
5354     if (wined3d_settings.strict_draw_ordering
5355             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5356             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5357         wglFlush(); /* Flush to ensure ordering across contexts. */
5358
5359     context_release(context);
5360 }
5361
5362 /* Do not call while under the GL lock. */
5363 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const struct wined3d_color *color)
5364 {
5365     struct wined3d_device *device = s->resource.device;
5366     const struct blit_shader *blitter;
5367
5368     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5369             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5370     if (!blitter)
5371     {
5372         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5373         return WINED3DERR_INVALIDCALL;
5374     }
5375
5376     return blitter->color_fill(device, s, rect, color);
5377 }
5378
5379 /* Do not call while under the GL lock. */
5380 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5381         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5382         enum wined3d_texture_filter_type filter)
5383 {
5384     struct wined3d_device *device = dst_surface->resource.device;
5385     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5386     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5387
5388     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5389             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5390             flags, DDBltFx, debug_d3dtexturefiltertype(filter));
5391
5392     /* Get the swapchain. One of the surfaces has to be a primary surface */
5393     if (dst_surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
5394     {
5395         WARN("Destination is in sysmem, rejecting gl blt\n");
5396         return WINED3DERR_INVALIDCALL;
5397     }
5398
5399     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5400         dstSwapchain = dst_surface->container.u.swapchain;
5401
5402     if (src_surface)
5403     {
5404         if (src_surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
5405         {
5406             WARN("Src is in sysmem, rejecting gl blt\n");
5407             return WINED3DERR_INVALIDCALL;
5408         }
5409
5410         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5411             srcSwapchain = src_surface->container.u.swapchain;
5412     }
5413
5414     /* Early sort out of cases where no render target is used */
5415     if (!dstSwapchain && !srcSwapchain
5416             && src_surface != device->fb.render_targets[0]
5417             && dst_surface != device->fb.render_targets[0])
5418     {
5419         TRACE("No surface is render target, not using hardware blit.\n");
5420         return WINED3DERR_INVALIDCALL;
5421     }
5422
5423     /* No destination color keying supported */
5424     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5425     {
5426         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5427         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5428         return WINED3DERR_INVALIDCALL;
5429     }
5430
5431     if (dstSwapchain && dstSwapchain == srcSwapchain)
5432     {
5433         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5434         return WINED3DERR_INVALIDCALL;
5435     }
5436
5437     if (dstSwapchain && srcSwapchain)
5438     {
5439         FIXME("Implement hardware blit between two different swapchains\n");
5440         return WINED3DERR_INVALIDCALL;
5441     }
5442
5443     if (dstSwapchain)
5444     {
5445         /* Handled with regular texture -> swapchain blit */
5446         if (src_surface == device->fb.render_targets[0])
5447             TRACE("Blit from active render target to a swapchain\n");
5448     }
5449     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5450     {
5451         FIXME("Implement blit from a swapchain to the active render target\n");
5452         return WINED3DERR_INVALIDCALL;
5453     }
5454
5455     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5456     {
5457         /* Blit from render target to texture */
5458         BOOL stretchx;
5459
5460         /* P8 read back is not implemented */
5461         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5462                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5463         {
5464             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5465             return WINED3DERR_INVALIDCALL;
5466         }
5467
5468         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5469         {
5470             TRACE("Color keying not supported by frame buffer to texture blit\n");
5471             return WINED3DERR_INVALIDCALL;
5472             /* Destination color key is checked above */
5473         }
5474
5475         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5476             stretchx = TRUE;
5477         else
5478             stretchx = FALSE;
5479
5480         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5481          * flip the image nor scale it.
5482          *
5483          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5484          * -> If the app wants a image width an unscaled width, copy it line per line
5485          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5486          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5487          *    back buffer. This is slower than reading line per line, thus not used for flipping
5488          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5489          *    pixel by pixel. */
5490         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5491                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5492         {
5493             TRACE("No stretching in x direction, using direct framebuffer -> texture copy.\n");
5494             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, filter);
5495         }
5496         else
5497         {
5498             TRACE("Using hardware stretching to flip / stretch the texture.\n");
5499             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, filter);
5500         }
5501
5502         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5503         {
5504             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5505             dst_surface->resource.allocatedMemory = NULL;
5506             dst_surface->resource.heapMemory = NULL;
5507         }
5508         else
5509         {
5510             dst_surface->flags &= ~SFLAG_INSYSMEM;
5511         }
5512
5513         return WINED3D_OK;
5514     }
5515     else if (src_surface)
5516     {
5517         /* Blit from offscreen surface to render target */
5518         struct wined3d_color_key old_blt_key = src_surface->src_blt_color_key;
5519         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5520
5521         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5522
5523         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5524                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5525                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5526         {
5527             FIXME("Unsupported blit operation falling back to software\n");
5528             return WINED3DERR_INVALIDCALL;
5529         }
5530
5531         /* Color keying: Check if we have to do a color keyed blt,
5532          * and if not check if a color key is activated.
5533          *
5534          * Just modify the color keying parameters in the surface and restore them afterwards
5535          * The surface keeps track of the color key last used to load the opengl surface.
5536          * PreLoad will catch the change to the flags and color key and reload if necessary.
5537          */
5538         if (flags & WINEDDBLT_KEYSRC)
5539         {
5540             /* Use color key from surface */
5541         }
5542         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5543         {
5544             /* Use color key from DDBltFx */
5545             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5546             src_surface->src_blt_color_key = DDBltFx->ddckSrcColorkey;
5547         }
5548         else
5549         {
5550             /* Do not use color key */
5551             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5552         }
5553
5554         surface_blt_to_drawable(device, filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5555                 src_surface, src_rect, dst_surface, dst_rect);
5556
5557         /* Restore the color key parameters */
5558         src_surface->CKeyFlags = oldCKeyFlags;
5559         src_surface->src_blt_color_key = old_blt_key;
5560
5561         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5562
5563         return WINED3D_OK;
5564     }
5565
5566     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5567     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5568     return WINED3DERR_INVALIDCALL;
5569 }
5570
5571 /* GL locking is done by the caller */
5572 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5573         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5574 {
5575     struct wined3d_device *device = surface->resource.device;
5576     const struct wined3d_gl_info *gl_info = context->gl_info;
5577     GLint compare_mode = GL_NONE;
5578     struct blt_info info;
5579     GLint old_binding = 0;
5580     RECT rect;
5581
5582     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5583
5584     glDisable(GL_CULL_FACE);
5585     glDisable(GL_BLEND);
5586     glDisable(GL_ALPHA_TEST);
5587     glDisable(GL_SCISSOR_TEST);
5588     glDisable(GL_STENCIL_TEST);
5589     glEnable(GL_DEPTH_TEST);
5590     glDepthFunc(GL_ALWAYS);
5591     glDepthMask(GL_TRUE);
5592     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5593     glViewport(x, y, w, h);
5594     glDepthRange(0.0, 1.0);
5595
5596     SetRect(&rect, 0, h, w, 0);
5597     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5598     context_active_texture(context, context->gl_info, 0);
5599     glGetIntegerv(info.binding, &old_binding);
5600     glBindTexture(info.bind_target, texture);
5601     if (gl_info->supported[ARB_SHADOW])
5602     {
5603         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5604         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5605     }
5606
5607     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5608             gl_info, info.tex_type, &surface->ds_current_size);
5609
5610     glBegin(GL_TRIANGLE_STRIP);
5611     glTexCoord3fv(info.coords[0]);
5612     glVertex2f(-1.0f, -1.0f);
5613     glTexCoord3fv(info.coords[1]);
5614     glVertex2f(1.0f, -1.0f);
5615     glTexCoord3fv(info.coords[2]);
5616     glVertex2f(-1.0f, 1.0f);
5617     glTexCoord3fv(info.coords[3]);
5618     glVertex2f(1.0f, 1.0f);
5619     glEnd();
5620
5621     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5622     glBindTexture(info.bind_target, old_binding);
5623
5624     glPopAttrib();
5625
5626     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5627 }
5628
5629 void surface_modify_ds_location(struct wined3d_surface *surface,
5630         DWORD location, UINT w, UINT h)
5631 {
5632     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5633
5634     if (location & ~(SFLAG_LOCATIONS | SFLAG_DISCARDED))
5635         FIXME("Invalid location (%#x) specified.\n", location);
5636
5637     if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5638             || (!(surface->flags & SFLAG_INTEXTURE) && (location & SFLAG_INTEXTURE)))
5639     {
5640         if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5641         {
5642             TRACE("Passing to container.\n");
5643             wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5644         }
5645     }
5646
5647     surface->ds_current_size.cx = w;
5648     surface->ds_current_size.cy = h;
5649     surface->flags &= ~(SFLAG_LOCATIONS | SFLAG_DISCARDED);
5650     surface->flags |= location;
5651 }
5652
5653 /* Context activation is done by the caller. */
5654 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5655 {
5656     struct wined3d_device *device = surface->resource.device;
5657     GLsizei w, h;
5658
5659     TRACE("surface %p, new location %#x.\n", surface, location);
5660
5661     /* TODO: Make this work for modes other than FBO */
5662     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5663
5664     if (!(surface->flags & location))
5665     {
5666         w = surface->ds_current_size.cx;
5667         h = surface->ds_current_size.cy;
5668         surface->ds_current_size.cx = 0;
5669         surface->ds_current_size.cy = 0;
5670     }
5671     else
5672     {
5673         w = surface->resource.width;
5674         h = surface->resource.height;
5675     }
5676
5677     if (surface->ds_current_size.cx == surface->resource.width
5678             && surface->ds_current_size.cy == surface->resource.height)
5679     {
5680         TRACE("Location (%#x) is already up to date.\n", location);
5681         return;
5682     }
5683
5684     if (surface->current_renderbuffer)
5685     {
5686         FIXME("Not supported with fixed up depth stencil.\n");
5687         return;
5688     }
5689
5690     if (surface->flags & SFLAG_DISCARDED)
5691     {
5692         TRACE("Surface was discarded, no need copy data.\n");
5693         switch (location)
5694         {
5695             case SFLAG_INTEXTURE:
5696                 surface_prepare_texture(surface, context, FALSE);
5697                 break;
5698             case SFLAG_INRB_MULTISAMPLE:
5699                 surface_prepare_rb(surface, context->gl_info, TRUE);
5700                 break;
5701             case SFLAG_INDRAWABLE:
5702                 /* Nothing to do */
5703                 break;
5704             default:
5705                 FIXME("Unhandled location %#x\n", location);
5706         }
5707         surface->flags &= ~SFLAG_DISCARDED;
5708         surface->flags |= location;
5709         surface->ds_current_size.cx = surface->resource.width;
5710         surface->ds_current_size.cy = surface->resource.height;
5711         return;
5712     }
5713
5714     if (!(surface->flags & SFLAG_LOCATIONS))
5715     {
5716         FIXME("No up to date depth stencil location.\n");
5717         surface->flags |= location;
5718         surface->ds_current_size.cx = surface->resource.width;
5719         surface->ds_current_size.cy = surface->resource.height;
5720         return;
5721     }
5722
5723     if (location == SFLAG_INTEXTURE)
5724     {
5725         GLint old_binding = 0;
5726         GLenum bind_target;
5727
5728         /* The render target is allowed to be smaller than the depth/stencil
5729          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5730          * than the offscreen surface. Don't overwrite the offscreen surface
5731          * with undefined data. */
5732         w = min(w, context->swapchain->desc.backbuffer_width);
5733         h = min(h, context->swapchain->desc.backbuffer_height);
5734
5735         TRACE("Copying onscreen depth buffer to depth texture.\n");
5736
5737         ENTER_GL();
5738
5739         if (!device->depth_blt_texture)
5740         {
5741             glGenTextures(1, &device->depth_blt_texture);
5742         }
5743
5744         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5745          * directly on the FBO texture. That's because we need to flip. */
5746         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5747                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5748         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5749         {
5750             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5751             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5752         }
5753         else
5754         {
5755             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5756             bind_target = GL_TEXTURE_2D;
5757         }
5758         glBindTexture(bind_target, device->depth_blt_texture);
5759         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5760          * internal format, because the internal format might include stencil
5761          * data. In principle we should copy stencil data as well, but unless
5762          * the driver supports stencil export it's hard to do, and doesn't
5763          * seem to be needed in practice. If the hardware doesn't support
5764          * writing stencil data, the glCopyTexImage2D() call might trigger
5765          * software fallbacks. */
5766         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5767         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5768         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5769         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5770         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5771         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5772         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5773         glBindTexture(bind_target, old_binding);
5774
5775         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5776                 NULL, surface, SFLAG_INTEXTURE);
5777         context_set_draw_buffer(context, GL_NONE);
5778         glReadBuffer(GL_NONE);
5779
5780         /* Do the actual blit */
5781         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5782         checkGLcall("depth_blt");
5783
5784         context_invalidate_state(context, STATE_FRAMEBUFFER);
5785
5786         LEAVE_GL();
5787
5788         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5789     }
5790     else if (location == SFLAG_INDRAWABLE)
5791     {
5792         TRACE("Copying depth texture to onscreen depth buffer.\n");
5793
5794         ENTER_GL();
5795
5796         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5797                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5798         surface_depth_blt(surface, context, surface->texture_name,
5799                 0, surface->pow2Height - h, w, h, surface->texture_target);
5800         checkGLcall("depth_blt");
5801
5802         context_invalidate_state(context, STATE_FRAMEBUFFER);
5803
5804         LEAVE_GL();
5805
5806         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5807     }
5808     else
5809     {
5810         ERR("Invalid location (%#x) specified.\n", location);
5811     }
5812
5813     surface->flags |= location;
5814     surface->ds_current_size.cx = surface->resource.width;
5815     surface->ds_current_size.cy = surface->resource.height;
5816 }
5817
5818 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5819 {
5820     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5821     struct wined3d_surface *overlay;
5822
5823     TRACE("surface %p, location %s, persistent %#x.\n",
5824             surface, debug_surflocation(location), persistent);
5825
5826     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5827             && !(surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
5828             && (location & SFLAG_INDRAWABLE))
5829         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5830
5831     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5832             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5833         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5834
5835     if (persistent)
5836     {
5837         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5838                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5839         {
5840             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5841             {
5842                 TRACE("Passing to container.\n");
5843                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5844             }
5845         }
5846         surface->flags &= ~SFLAG_LOCATIONS;
5847         surface->flags |= location;
5848
5849         /* Redraw emulated overlays, if any */
5850         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5851         {
5852             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5853             {
5854                 surface_draw_overlay(overlay);
5855             }
5856         }
5857     }
5858     else
5859     {
5860         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5861         {
5862             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5863             {
5864                 TRACE("Passing to container\n");
5865                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5866             }
5867         }
5868         surface->flags &= ~location;
5869     }
5870
5871     if (!(surface->flags & SFLAG_LOCATIONS))
5872     {
5873         ERR("Surface %p does not have any up to date location.\n", surface);
5874     }
5875 }
5876
5877 static DWORD resource_access_from_location(DWORD location)
5878 {
5879     switch (location)
5880     {
5881         case SFLAG_INSYSMEM:
5882             return WINED3D_RESOURCE_ACCESS_CPU;
5883
5884         case SFLAG_INDRAWABLE:
5885         case SFLAG_INSRGBTEX:
5886         case SFLAG_INTEXTURE:
5887         case SFLAG_INRB_MULTISAMPLE:
5888         case SFLAG_INRB_RESOLVED:
5889             return WINED3D_RESOURCE_ACCESS_GPU;
5890
5891         default:
5892             FIXME("Unhandled location %#x.\n", location);
5893             return 0;
5894     }
5895 }
5896
5897 static void surface_load_sysmem(struct wined3d_surface *surface,
5898         const struct wined3d_gl_info *gl_info, const RECT *rect)
5899 {
5900     surface_prepare_system_memory(surface);
5901
5902     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED))
5903         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5904
5905     /* Download the surface to system memory. */
5906     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5907     {
5908         struct wined3d_device *device = surface->resource.device;
5909         struct wined3d_context *context;
5910
5911         /* TODO: Use already acquired context when possible. */
5912         context = context_acquire(device, NULL);
5913
5914         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5915         surface_download_data(surface, gl_info);
5916
5917         context_release(context);
5918
5919         return;
5920     }
5921
5922     if (surface->flags & SFLAG_INDRAWABLE)
5923     {
5924         read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5925                 wined3d_surface_get_pitch(surface));
5926         return;
5927     }
5928
5929     FIXME("Can't load surface %p with location flags %#x into sysmem.\n",
5930             surface, surface->flags & SFLAG_LOCATIONS);
5931 }
5932
5933 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5934         const struct wined3d_gl_info *gl_info, const RECT *rect)
5935 {
5936     struct wined3d_device *device = surface->resource.device;
5937     enum wined3d_conversion_type convert;
5938     struct wined3d_format format;
5939     UINT byte_count;
5940     BYTE *mem;
5941
5942     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5943     {
5944         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5945         return WINED3DERR_INVALIDCALL;
5946     }
5947
5948     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5949         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5950
5951     if (surface->flags & SFLAG_INTEXTURE)
5952     {
5953         RECT r;
5954
5955         surface_get_rect(surface, rect, &r);
5956         surface_blt_to_drawable(device, WINED3D_TEXF_POINT, FALSE, surface, &r, surface, &r);
5957
5958         return WINED3D_OK;
5959     }
5960
5961     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5962     {
5963         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5964          * path through sysmem. */
5965         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5966     }
5967
5968     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5969
5970     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5971      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5972      * called. */
5973     if ((convert != WINED3D_CT_NONE) && (surface->flags & SFLAG_PBO))
5974     {
5975         struct wined3d_context *context;
5976
5977         TRACE("Removing the pbo attached to surface %p.\n", surface);
5978
5979         /* TODO: Use already acquired context when possible. */
5980         context = context_acquire(device, NULL);
5981
5982         surface_remove_pbo(surface, gl_info);
5983
5984         context_release(context);
5985     }
5986
5987     if ((convert != WINED3D_CT_NONE) && surface->resource.allocatedMemory)
5988     {
5989         UINT height = surface->resource.height;
5990         UINT width = surface->resource.width;
5991         UINT src_pitch, dst_pitch;
5992
5993         byte_count = format.conv_byte_count;
5994         src_pitch = wined3d_surface_get_pitch(surface);
5995
5996         /* Stick to the alignment for the converted surface too, makes it
5997          * easier to load the surface. */
5998         dst_pitch = width * byte_count;
5999         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6000
6001         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6002         {
6003             ERR("Out of memory (%u).\n", dst_pitch * height);
6004             return E_OUTOFMEMORY;
6005         }
6006
6007         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
6008                 src_pitch, width, height, dst_pitch, convert, surface);
6009
6010         surface->flags |= SFLAG_CONVERTED;
6011     }
6012     else
6013     {
6014         surface->flags &= ~SFLAG_CONVERTED;
6015         mem = surface->resource.allocatedMemory;
6016         byte_count = format.byte_count;
6017     }
6018
6019     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
6020
6021     /* Don't delete PBO memory. */
6022     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6023         HeapFree(GetProcessHeap(), 0, mem);
6024
6025     return WINED3D_OK;
6026 }
6027
6028 static HRESULT surface_load_texture(struct wined3d_surface *surface,
6029         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
6030 {
6031     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
6032     struct wined3d_device *device = surface->resource.device;
6033     enum wined3d_conversion_type convert;
6034     struct wined3d_context *context;
6035     UINT width, src_pitch, dst_pitch;
6036     struct wined3d_bo_address data;
6037     struct wined3d_format format;
6038     POINT dst_point = {0, 0};
6039     BYTE *mem;
6040
6041     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
6042             && surface_is_offscreen(surface)
6043             && (surface->flags & SFLAG_INDRAWABLE))
6044     {
6045         surface_load_fb_texture(surface, srgb);
6046
6047         return WINED3D_OK;
6048     }
6049
6050     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
6051             && (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB)
6052             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6053                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6054                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6055     {
6056         if (srgb)
6057             surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, SFLAG_INTEXTURE,
6058                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
6059         else
6060             surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, SFLAG_INSRGBTEX,
6061                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
6062
6063         return WINED3D_OK;
6064     }
6065
6066     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED)
6067             && (!srgb || (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB))
6068             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6069                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6070                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6071     {
6072         DWORD src_location = surface->flags & SFLAG_INRB_RESOLVED ? SFLAG_INRB_RESOLVED : SFLAG_INRB_MULTISAMPLE;
6073         DWORD dst_location = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
6074         RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6075
6076         surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, src_location,
6077                 &rect, surface, dst_location, &rect);
6078
6079         return WINED3D_OK;
6080     }
6081
6082     /* Upload from system memory */
6083
6084     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
6085             TRUE /* We will use textures */, &format, &convert);
6086
6087     if (srgb)
6088     {
6089         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
6090         {
6091             /* Performance warning... */
6092             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6093             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6094         }
6095     }
6096     else
6097     {
6098         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6099         {
6100             /* Performance warning... */
6101             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6102             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6103         }
6104     }
6105
6106     if (!(surface->flags & SFLAG_INSYSMEM))
6107     {
6108         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6109         /* Lets hope we get it from somewhere... */
6110         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6111     }
6112
6113     /* TODO: Use already acquired context when possible. */
6114     context = context_acquire(device, NULL);
6115
6116     surface_prepare_texture(surface, context, srgb);
6117     surface_bind_and_dirtify(surface, context, srgb);
6118
6119     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6120     {
6121         surface->flags |= SFLAG_GLCKEY;
6122         surface->gl_color_key = surface->src_blt_color_key;
6123     }
6124     else surface->flags &= ~SFLAG_GLCKEY;
6125
6126     width = surface->resource.width;
6127     src_pitch = wined3d_surface_get_pitch(surface);
6128
6129     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6130      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6131      * called. */
6132     if ((convert != WINED3D_CT_NONE || format.convert) && (surface->flags & SFLAG_PBO))
6133     {
6134         TRACE("Removing the pbo attached to surface %p.\n", surface);
6135         surface_remove_pbo(surface, gl_info);
6136     }
6137
6138     if (format.convert)
6139     {
6140         /* This code is entered for texture formats which need a fixup. */
6141         UINT height = surface->resource.height;
6142
6143         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6144         dst_pitch = width * format.conv_byte_count;
6145         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6146
6147         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6148         {
6149             ERR("Out of memory (%u).\n", dst_pitch * height);
6150             context_release(context);
6151             return E_OUTOFMEMORY;
6152         }
6153         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6154         format.byte_count = format.conv_byte_count;
6155         src_pitch = dst_pitch;
6156     }
6157     else if (convert != WINED3D_CT_NONE && surface->resource.allocatedMemory)
6158     {
6159         /* This code is only entered for color keying fixups */
6160         UINT height = surface->resource.height;
6161
6162         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6163         dst_pitch = width * format.conv_byte_count;
6164         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6165
6166         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6167         {
6168             ERR("Out of memory (%u).\n", dst_pitch * height);
6169             context_release(context);
6170             return E_OUTOFMEMORY;
6171         }
6172         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6173                 width, height, dst_pitch, convert, surface);
6174         format.byte_count = format.conv_byte_count;
6175         src_pitch = dst_pitch;
6176     }
6177     else
6178     {
6179         mem = surface->resource.allocatedMemory;
6180     }
6181
6182     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6183     data.addr = mem;
6184     surface_upload_data(surface, gl_info, &format, &src_rect, src_pitch, &dst_point, srgb, &data);
6185
6186     context_release(context);
6187
6188     /* Don't delete PBO memory. */
6189     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6190         HeapFree(GetProcessHeap(), 0, mem);
6191
6192     return WINED3D_OK;
6193 }
6194
6195 static void surface_multisample_resolve(struct wined3d_surface *surface)
6196 {
6197     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6198
6199     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6200         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6201
6202     surface_blt_fbo(surface->resource.device, WINED3D_TEXF_POINT,
6203             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6204 }
6205
6206 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6207 {
6208     struct wined3d_device *device = surface->resource.device;
6209     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6210     HRESULT hr;
6211
6212     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6213
6214     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6215     {
6216         if (location == SFLAG_INTEXTURE)
6217         {
6218             struct wined3d_context *context = context_acquire(device, NULL);
6219             surface_load_ds_location(surface, context, location);
6220             context_release(context);
6221             return WINED3D_OK;
6222         }
6223         else
6224         {
6225             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6226             return WINED3DERR_INVALIDCALL;
6227         }
6228     }
6229
6230     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6231         location = SFLAG_INTEXTURE;
6232
6233     if (surface->flags & location)
6234     {
6235         TRACE("Location already up to date.\n");
6236
6237         if (location == SFLAG_INSYSMEM && !(surface->flags & SFLAG_PBO)
6238                 && surface_need_pbo(surface, gl_info))
6239             surface_load_pbo(surface, gl_info);
6240
6241         return WINED3D_OK;
6242     }
6243
6244     if (WARN_ON(d3d_surface))
6245     {
6246         DWORD required_access = resource_access_from_location(location);
6247         if ((surface->resource.access_flags & required_access) != required_access)
6248             WARN("Operation requires %#x access, but surface only has %#x.\n",
6249                     required_access, surface->resource.access_flags);
6250     }
6251
6252     if (!(surface->flags & SFLAG_LOCATIONS))
6253     {
6254         ERR("Surface %p does not have any up to date location.\n", surface);
6255         surface->flags |= SFLAG_LOST;
6256         return WINED3DERR_DEVICELOST;
6257     }
6258
6259     switch (location)
6260     {
6261         case SFLAG_INSYSMEM:
6262             surface_load_sysmem(surface, gl_info, rect);
6263             break;
6264
6265         case SFLAG_INDRAWABLE:
6266             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6267                 return hr;
6268             break;
6269
6270         case SFLAG_INRB_RESOLVED:
6271             surface_multisample_resolve(surface);
6272             break;
6273
6274         case SFLAG_INTEXTURE:
6275         case SFLAG_INSRGBTEX:
6276             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6277                 return hr;
6278             break;
6279
6280         default:
6281             ERR("Don't know how to handle location %#x.\n", location);
6282             break;
6283     }
6284
6285     if (!rect)
6286     {
6287         surface->flags |= location;
6288
6289         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6290             surface_evict_sysmem(surface);
6291     }
6292
6293     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6294             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6295     {
6296         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6297     }
6298
6299     return WINED3D_OK;
6300 }
6301
6302 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6303 {
6304     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6305
6306     /* Not on a swapchain - must be offscreen */
6307     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6308
6309     /* The front buffer is always onscreen */
6310     if (surface == swapchain->front_buffer) return FALSE;
6311
6312     /* If the swapchain is rendered to an FBO, the backbuffer is
6313      * offscreen, otherwise onscreen */
6314     return swapchain->render_to_fbo;
6315 }
6316
6317 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6318 /* Context activation is done by the caller. */
6319 static void ffp_blit_free(struct wined3d_device *device) { }
6320
6321 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6322 /* Context activation is done by the caller. */
6323 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6324 {
6325     BYTE table[256][4];
6326     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6327
6328     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6329
6330     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6331     ENTER_GL();
6332     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6333     LEAVE_GL();
6334 }
6335
6336 /* Context activation is done by the caller. */
6337 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6338 {
6339     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6340
6341     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6342      * else the surface is converted in software at upload time in LoadLocation.
6343      */
6344     if (!(surface->flags & SFLAG_CONVERTED) && fixup == COMPLEX_FIXUP_P8
6345             && context->gl_info->supported[EXT_PALETTED_TEXTURE])
6346         ffp_blit_p8_upload_palette(surface, context->gl_info);
6347
6348     ENTER_GL();
6349     glEnable(surface->texture_target);
6350     checkGLcall("glEnable(surface->texture_target)");
6351     LEAVE_GL();
6352     return WINED3D_OK;
6353 }
6354
6355 /* Context activation is done by the caller. */
6356 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6357 {
6358     ENTER_GL();
6359     glDisable(GL_TEXTURE_2D);
6360     checkGLcall("glDisable(GL_TEXTURE_2D)");
6361     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6362     {
6363         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6364         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6365     }
6366     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6367     {
6368         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6369         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6370     }
6371     LEAVE_GL();
6372 }
6373
6374 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6375         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
6376         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
6377 {
6378     enum complex_fixup src_fixup;
6379
6380     switch (blit_op)
6381     {
6382         case WINED3D_BLIT_OP_COLOR_BLIT:
6383             if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM)
6384                 return FALSE;
6385
6386             src_fixup = get_complex_fixup(src_format->color_fixup);
6387             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6388             {
6389                 TRACE("Checking support for fixup:\n");
6390                 dump_color_fixup_desc(src_format->color_fixup);
6391             }
6392
6393             if (!is_identity_fixup(dst_format->color_fixup))
6394             {
6395                 TRACE("Destination fixups are not supported\n");
6396                 return FALSE;
6397             }
6398
6399             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6400             {
6401                 TRACE("P8 fixup supported\n");
6402                 return TRUE;
6403             }
6404
6405             /* We only support identity conversions. */
6406             if (is_identity_fixup(src_format->color_fixup))
6407             {
6408                 TRACE("[OK]\n");
6409                 return TRUE;
6410             }
6411
6412             TRACE("[FAILED]\n");
6413             return FALSE;
6414
6415         case WINED3D_BLIT_OP_COLOR_FILL:
6416             if (dst_pool == WINED3D_POOL_SYSTEM_MEM)
6417                 return FALSE;
6418
6419             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6420             {
6421                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6422                     return FALSE;
6423             }
6424             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6425             {
6426                 TRACE("Color fill not supported\n");
6427                 return FALSE;
6428             }
6429
6430             /* FIXME: We should reject color fills on formats with fixups,
6431              * but this would break P8 color fills for example. */
6432
6433             return TRUE;
6434
6435         case WINED3D_BLIT_OP_DEPTH_FILL:
6436             return TRUE;
6437
6438         default:
6439             TRACE("Unsupported blit_op=%d\n", blit_op);
6440             return FALSE;
6441     }
6442 }
6443
6444 /* Do not call while under the GL lock. */
6445 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6446         const RECT *dst_rect, const struct wined3d_color *color)
6447 {
6448     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6449     struct wined3d_fb_state fb = {&dst_surface, NULL};
6450
6451     return device_clear_render_targets(device, 1, &fb,
6452             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6453 }
6454
6455 /* Do not call while under the GL lock. */
6456 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6457         struct wined3d_surface *surface, const RECT *rect, float depth)
6458 {
6459     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6460     struct wined3d_fb_state fb = {NULL, surface};
6461
6462     return device_clear_render_targets(device, 0, &fb,
6463             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6464 }
6465
6466 const struct blit_shader ffp_blit =  {
6467     ffp_blit_alloc,
6468     ffp_blit_free,
6469     ffp_blit_set,
6470     ffp_blit_unset,
6471     ffp_blit_supported,
6472     ffp_blit_color_fill,
6473     ffp_blit_depth_fill,
6474 };
6475
6476 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6477 {
6478     return WINED3D_OK;
6479 }
6480
6481 /* Context activation is done by the caller. */
6482 static void cpu_blit_free(struct wined3d_device *device)
6483 {
6484 }
6485
6486 /* Context activation is done by the caller. */
6487 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6488 {
6489     return WINED3D_OK;
6490 }
6491
6492 /* Context activation is done by the caller. */
6493 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6494 {
6495 }
6496
6497 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6498         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
6499         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
6500 {
6501     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6502     {
6503         return TRUE;
6504     }
6505
6506     return FALSE;
6507 }
6508
6509 static HRESULT surface_cpu_blt_compressed(const BYTE *src_data, BYTE *dst_data,
6510         UINT src_pitch, UINT dst_pitch, UINT update_w, UINT update_h,
6511         const struct wined3d_format *format, DWORD flags, const WINEDDBLTFX *fx)
6512 {
6513     UINT row_block_count;
6514     const BYTE *src_row;
6515     BYTE *dst_row;
6516     UINT x, y;
6517
6518     src_row = src_data;
6519     dst_row = dst_data;
6520
6521     row_block_count = (update_w + format->block_width - 1) / format->block_width;
6522
6523     if (!flags)
6524     {
6525         for (y = 0; y < update_h; y += format->block_height)
6526         {
6527             memcpy(dst_row, src_row, row_block_count * format->block_byte_count);
6528             src_row += src_pitch;
6529             dst_row += dst_pitch;
6530         }
6531
6532         return WINED3D_OK;
6533     }
6534
6535     if (flags == WINEDDBLT_DDFX && fx->dwDDFX == WINEDDBLTFX_MIRRORUPDOWN)
6536     {
6537         src_row += (((update_h / format->block_height) - 1) * src_pitch);
6538
6539         switch (format->id)
6540         {
6541             case WINED3DFMT_DXT1:
6542                 for (y = 0; y < update_h; y += format->block_height)
6543                 {
6544                     struct block
6545                     {
6546                         WORD color[2];
6547                         BYTE control_row[4];
6548                     };
6549
6550                     const struct block *s = (const struct block *)src_row;
6551                     struct block *d = (struct block *)dst_row;
6552
6553                     for (x = 0; x < row_block_count; ++x)
6554                     {
6555                         d[x].color[0] = s[x].color[0];
6556                         d[x].color[1] = s[x].color[1];
6557                         d[x].control_row[0] = s[x].control_row[3];
6558                         d[x].control_row[1] = s[x].control_row[2];
6559                         d[x].control_row[2] = s[x].control_row[1];
6560                         d[x].control_row[3] = s[x].control_row[0];
6561                     }
6562                     src_row -= src_pitch;
6563                     dst_row += dst_pitch;
6564                 }
6565                 return WINED3D_OK;
6566
6567             case WINED3DFMT_DXT3:
6568                 for (y = 0; y < update_h; y += format->block_height)
6569                 {
6570                     struct block
6571                     {
6572                         WORD alpha_row[4];
6573                         WORD color[2];
6574                         BYTE control_row[4];
6575                     };
6576
6577                     const struct block *s = (const struct block *)src_row;
6578                     struct block *d = (struct block *)dst_row;
6579
6580                     for (x = 0; x < row_block_count; ++x)
6581                     {
6582                         d[x].alpha_row[0] = s[x].alpha_row[3];
6583                         d[x].alpha_row[1] = s[x].alpha_row[2];
6584                         d[x].alpha_row[2] = s[x].alpha_row[1];
6585                         d[x].alpha_row[3] = s[x].alpha_row[0];
6586                         d[x].color[0] = s[x].color[0];
6587                         d[x].color[1] = s[x].color[1];
6588                         d[x].control_row[0] = s[x].control_row[3];
6589                         d[x].control_row[1] = s[x].control_row[2];
6590                         d[x].control_row[2] = s[x].control_row[1];
6591                         d[x].control_row[3] = s[x].control_row[0];
6592                     }
6593                     src_row -= src_pitch;
6594                     dst_row += dst_pitch;
6595                 }
6596                 return WINED3D_OK;
6597
6598             default:
6599                 FIXME("Compressed flip not implemented for format %s.\n",
6600                         debug_d3dformat(format->id));
6601                 return E_NOTIMPL;
6602         }
6603     }
6604
6605     FIXME("Unsupported blit on compressed surface (format %s, flags %#x, DDFX %#x).\n",
6606             debug_d3dformat(format->id), flags, flags & WINEDDBLT_DDFX ? fx->dwDDFX : 0);
6607
6608     return E_NOTIMPL;
6609 }
6610
6611 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6612         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6613         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter)
6614 {
6615     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6616     const struct wined3d_format *src_format, *dst_format;
6617     struct wined3d_surface *orig_src = src_surface;
6618     struct wined3d_mapped_rect dst_map, src_map;
6619     const BYTE *sbase = NULL;
6620     HRESULT hr = WINED3D_OK;
6621     const BYTE *sbuf;
6622     BYTE *dbuf;
6623     int x, y;
6624
6625     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6626             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6627             flags, fx, debug_d3dtexturefiltertype(filter));
6628
6629     if (src_surface == dst_surface)
6630     {
6631         wined3d_surface_map(dst_surface, &dst_map, NULL, 0);
6632         src_map = dst_map;
6633         src_format = dst_surface->resource.format;
6634         dst_format = src_format;
6635     }
6636     else
6637     {
6638         dst_format = dst_surface->resource.format;
6639         if (src_surface)
6640         {
6641             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6642             {
6643                 src_surface = surface_convert_format(src_surface, dst_format->id);
6644                 if (!src_surface)
6645                 {
6646                     /* The conv function writes a FIXME */
6647                     WARN("Cannot convert source surface format to dest format.\n");
6648                     goto release;
6649                 }
6650             }
6651             wined3d_surface_map(src_surface, &src_map, NULL, WINED3DLOCK_READONLY);
6652             src_format = src_surface->resource.format;
6653         }
6654         else
6655         {
6656             src_format = dst_format;
6657         }
6658
6659         wined3d_surface_map(dst_surface, &dst_map, dst_rect, 0);
6660     }
6661
6662     bpp = dst_surface->resource.format->byte_count;
6663     srcheight = src_rect->bottom - src_rect->top;
6664     srcwidth = src_rect->right - src_rect->left;
6665     dstheight = dst_rect->bottom - dst_rect->top;
6666     dstwidth = dst_rect->right - dst_rect->left;
6667     width = (dst_rect->right - dst_rect->left) * bpp;
6668
6669     if (src_surface)
6670         sbase = (BYTE *)src_map.data
6671                 + ((src_rect->top / src_format->block_height) * src_map.row_pitch)
6672                 + ((src_rect->left / src_format->block_width) * src_format->block_byte_count);
6673     if (src_surface != dst_surface)
6674         dbuf = dst_map.data;
6675     else
6676         dbuf = (BYTE *)dst_map.data
6677                 + ((dst_rect->top / dst_format->block_height) * dst_map.row_pitch)
6678                 + ((dst_rect->left / dst_format->block_width) * dst_format->block_byte_count);
6679
6680     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_BLOCKS)
6681     {
6682         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6683
6684         if (src_surface == dst_surface)
6685         {
6686             FIXME("Only plain blits supported on compressed surfaces.\n");
6687             hr = E_NOTIMPL;
6688             goto release;
6689         }
6690
6691         if (srcheight != dstheight || srcwidth != dstwidth)
6692         {
6693             WARN("Stretching not supported on compressed surfaces.\n");
6694             hr = WINED3DERR_INVALIDCALL;
6695             goto release;
6696         }
6697
6698         if (srcwidth & (src_format->block_width - 1) || srcheight & (src_format->block_height - 1))
6699         {
6700             WARN("Rectangle not block-aligned.\n");
6701             hr = WINED3DERR_INVALIDCALL;
6702             goto release;
6703         }
6704
6705         hr = surface_cpu_blt_compressed(sbase, dbuf,
6706                 src_map.row_pitch, dst_map.row_pitch, dstwidth, dstheight,
6707                 src_format, flags, fx);
6708         goto release;
6709     }
6710
6711     /* First, all the 'source-less' blits */
6712     if (flags & WINEDDBLT_COLORFILL)
6713     {
6714         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, fx->u5.dwFillColor);
6715         flags &= ~WINEDDBLT_COLORFILL;
6716     }
6717
6718     if (flags & WINEDDBLT_DEPTHFILL)
6719     {
6720         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6721     }
6722     if (flags & WINEDDBLT_ROP)
6723     {
6724         /* Catch some degenerate cases here. */
6725         switch (fx->dwROP)
6726         {
6727             case BLACKNESS:
6728                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, 0);
6729                 break;
6730             case 0xAA0029: /* No-op */
6731                 break;
6732             case WHITENESS:
6733                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, ~0U);
6734                 break;
6735             case SRCCOPY: /* Well, we do that below? */
6736                 break;
6737             default:
6738                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6739                 goto error;
6740         }
6741         flags &= ~WINEDDBLT_ROP;
6742     }
6743     if (flags & WINEDDBLT_DDROPS)
6744     {
6745         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6746     }
6747     /* Now the 'with source' blits. */
6748     if (src_surface)
6749     {
6750         int sx, xinc, sy, yinc;
6751
6752         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6753             goto release;
6754
6755         if (filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT
6756                 && (srcwidth != dstwidth || srcheight != dstheight))
6757         {
6758             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6759             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6760         }
6761
6762         xinc = (srcwidth << 16) / dstwidth;
6763         yinc = (srcheight << 16) / dstheight;
6764
6765         if (!flags)
6766         {
6767             /* No effects, we can cheat here. */
6768             if (dstwidth == srcwidth)
6769             {
6770                 if (dstheight == srcheight)
6771                 {
6772                     /* No stretching in either direction. This needs to be as
6773                      * fast as possible. */
6774                     sbuf = sbase;
6775
6776                     /* Check for overlapping surfaces. */
6777                     if (src_surface != dst_surface || dst_rect->top < src_rect->top
6778                             || dst_rect->right <= src_rect->left || src_rect->right <= dst_rect->left)
6779                     {
6780                         /* No overlap, or dst above src, so copy from top downwards. */
6781                         for (y = 0; y < dstheight; ++y)
6782                         {
6783                             memcpy(dbuf, sbuf, width);
6784                             sbuf += src_map.row_pitch;
6785                             dbuf += dst_map.row_pitch;
6786                         }
6787                     }
6788                     else if (dst_rect->top > src_rect->top)
6789                     {
6790                         /* Copy from bottom upwards. */
6791                         sbuf += src_map.row_pitch * dstheight;
6792                         dbuf += dst_map.row_pitch * dstheight;
6793                         for (y = 0; y < dstheight; ++y)
6794                         {
6795                             sbuf -= src_map.row_pitch;
6796                             dbuf -= dst_map.row_pitch;
6797                             memcpy(dbuf, sbuf, width);
6798                         }
6799                     }
6800                     else
6801                     {
6802                         /* Src and dst overlapping on the same line, use memmove. */
6803                         for (y = 0; y < dstheight; ++y)
6804                         {
6805                             memmove(dbuf, sbuf, width);
6806                             sbuf += src_map.row_pitch;
6807                             dbuf += dst_map.row_pitch;
6808                         }
6809                     }
6810                 }
6811                 else
6812                 {
6813                     /* Stretching in y direction only. */
6814                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6815                     {
6816                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6817                         memcpy(dbuf, sbuf, width);
6818                         dbuf += dst_map.row_pitch;
6819                     }
6820                 }
6821             }
6822             else
6823             {
6824                 /* Stretching in X direction. */
6825                 int last_sy = -1;
6826                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6827                 {
6828                     sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6829
6830                     if ((sy >> 16) == (last_sy >> 16))
6831                     {
6832                         /* This source row is the same as last source row -
6833                          * Copy the already stretched row. */
6834                         memcpy(dbuf, dbuf - dst_map.row_pitch, width);
6835                     }
6836                     else
6837                     {
6838 #define STRETCH_ROW(type) \
6839 do { \
6840     const type *s = (const type *)sbuf; \
6841     type *d = (type *)dbuf; \
6842     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6843         d[x] = s[sx >> 16]; \
6844 } while(0)
6845
6846                         switch(bpp)
6847                         {
6848                             case 1:
6849                                 STRETCH_ROW(BYTE);
6850                                 break;
6851                             case 2:
6852                                 STRETCH_ROW(WORD);
6853                                 break;
6854                             case 4:
6855                                 STRETCH_ROW(DWORD);
6856                                 break;
6857                             case 3:
6858                             {
6859                                 const BYTE *s;
6860                                 BYTE *d = dbuf;
6861                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6862                                 {
6863                                     DWORD pixel;
6864
6865                                     s = sbuf + 3 * (sx >> 16);
6866                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6867                                     d[0] = (pixel      ) & 0xff;
6868                                     d[1] = (pixel >>  8) & 0xff;
6869                                     d[2] = (pixel >> 16) & 0xff;
6870                                     d += 3;
6871                                 }
6872                                 break;
6873                             }
6874                             default:
6875                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6876                                 hr = WINED3DERR_NOTAVAILABLE;
6877                                 goto error;
6878                         }
6879 #undef STRETCH_ROW
6880                     }
6881                     dbuf += dst_map.row_pitch;
6882                     last_sy = sy;
6883                 }
6884             }
6885         }
6886         else
6887         {
6888             LONG dstyinc = dst_map.row_pitch, dstxinc = bpp;
6889             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6890             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6891             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6892             {
6893                 /* The color keying flags are checked for correctness in ddraw */
6894                 if (flags & WINEDDBLT_KEYSRC)
6895                 {
6896                     keylow  = src_surface->src_blt_color_key.color_space_low_value;
6897                     keyhigh = src_surface->src_blt_color_key.color_space_high_value;
6898                 }
6899                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6900                 {
6901                     keylow = fx->ddckSrcColorkey.color_space_low_value;
6902                     keyhigh = fx->ddckSrcColorkey.color_space_high_value;
6903                 }
6904
6905                 if (flags & WINEDDBLT_KEYDEST)
6906                 {
6907                     /* Destination color keys are taken from the source surface! */
6908                     destkeylow = src_surface->dst_blt_color_key.color_space_low_value;
6909                     destkeyhigh = src_surface->dst_blt_color_key.color_space_high_value;
6910                 }
6911                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6912                 {
6913                     destkeylow = fx->ddckDestColorkey.color_space_low_value;
6914                     destkeyhigh = fx->ddckDestColorkey.color_space_high_value;
6915                 }
6916
6917                 if (bpp == 1)
6918                 {
6919                     keymask = 0xff;
6920                 }
6921                 else
6922                 {
6923                     keymask = src_format->red_mask
6924                             | src_format->green_mask
6925                             | src_format->blue_mask;
6926                 }
6927                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6928             }
6929
6930             if (flags & WINEDDBLT_DDFX)
6931             {
6932                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6933                 LONG tmpxy;
6934                 dTopLeft     = dbuf;
6935                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6936                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dst_map.row_pitch);
6937                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6938
6939                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6940                 {
6941                     /* I don't think we need to do anything about this flag */
6942                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6943                 }
6944                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6945                 {
6946                     tmp          = dTopRight;
6947                     dTopRight    = dTopLeft;
6948                     dTopLeft     = tmp;
6949                     tmp          = dBottomRight;
6950                     dBottomRight = dBottomLeft;
6951                     dBottomLeft  = tmp;
6952                     dstxinc = dstxinc * -1;
6953                 }
6954                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6955                 {
6956                     tmp          = dTopLeft;
6957                     dTopLeft     = dBottomLeft;
6958                     dBottomLeft  = tmp;
6959                     tmp          = dTopRight;
6960                     dTopRight    = dBottomRight;
6961                     dBottomRight = tmp;
6962                     dstyinc = dstyinc * -1;
6963                 }
6964                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6965                 {
6966                     /* I don't think we need to do anything about this flag */
6967                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6968                 }
6969                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6970                 {
6971                     tmp          = dBottomRight;
6972                     dBottomRight = dTopLeft;
6973                     dTopLeft     = tmp;
6974                     tmp          = dBottomLeft;
6975                     dBottomLeft  = dTopRight;
6976                     dTopRight    = tmp;
6977                     dstxinc = dstxinc * -1;
6978                     dstyinc = dstyinc * -1;
6979                 }
6980                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6981                 {
6982                     tmp          = dTopLeft;
6983                     dTopLeft     = dBottomLeft;
6984                     dBottomLeft  = dBottomRight;
6985                     dBottomRight = dTopRight;
6986                     dTopRight    = tmp;
6987                     tmpxy   = dstxinc;
6988                     dstxinc = dstyinc;
6989                     dstyinc = tmpxy;
6990                     dstxinc = dstxinc * -1;
6991                 }
6992                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6993                 {
6994                     tmp          = dTopLeft;
6995                     dTopLeft     = dTopRight;
6996                     dTopRight    = dBottomRight;
6997                     dBottomRight = dBottomLeft;
6998                     dBottomLeft  = tmp;
6999                     tmpxy   = dstxinc;
7000                     dstxinc = dstyinc;
7001                     dstyinc = tmpxy;
7002                     dstyinc = dstyinc * -1;
7003                 }
7004                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
7005                 {
7006                     /* I don't think we need to do anything about this flag */
7007                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
7008                 }
7009                 dbuf = dTopLeft;
7010                 flags &= ~(WINEDDBLT_DDFX);
7011             }
7012
7013 #define COPY_COLORKEY_FX(type) \
7014 do { \
7015     const type *s; \
7016     type *d = (type *)dbuf, *dx, tmp; \
7017     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
7018     { \
7019         s = (const type *)(sbase + (sy >> 16) * src_map.row_pitch); \
7020         dx = d; \
7021         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
7022         { \
7023             tmp = s[sx >> 16]; \
7024             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
7025                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
7026             { \
7027                 dx[0] = tmp; \
7028             } \
7029             dx = (type *)(((BYTE *)dx) + dstxinc); \
7030         } \
7031         d = (type *)(((BYTE *)d) + dstyinc); \
7032     } \
7033 } while(0)
7034
7035             switch (bpp)
7036             {
7037                 case 1:
7038                     COPY_COLORKEY_FX(BYTE);
7039                     break;
7040                 case 2:
7041                     COPY_COLORKEY_FX(WORD);
7042                     break;
7043                 case 4:
7044                     COPY_COLORKEY_FX(DWORD);
7045                     break;
7046                 case 3:
7047                 {
7048                     const BYTE *s;
7049                     BYTE *d = dbuf, *dx;
7050                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
7051                     {
7052                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
7053                         dx = d;
7054                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7055                         {
7056                             DWORD pixel, dpixel = 0;
7057                             s = sbuf + 3 * (sx>>16);
7058                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7059                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7060                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7061                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7062                             {
7063                                 dx[0] = (pixel      ) & 0xff;
7064                                 dx[1] = (pixel >>  8) & 0xff;
7065                                 dx[2] = (pixel >> 16) & 0xff;
7066                             }
7067                             dx += dstxinc;
7068                         }
7069                         d += dstyinc;
7070                     }
7071                     break;
7072                 }
7073                 default:
7074                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7075                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7076                     hr = WINED3DERR_NOTAVAILABLE;
7077                     goto error;
7078 #undef COPY_COLORKEY_FX
7079             }
7080         }
7081     }
7082
7083 error:
7084     if (flags && FIXME_ON(d3d_surface))
7085     {
7086         FIXME("\tUnsupported flags: %#x.\n", flags);
7087     }
7088
7089 release:
7090     wined3d_surface_unmap(dst_surface);
7091     if (src_surface && src_surface != dst_surface)
7092         wined3d_surface_unmap(src_surface);
7093     /* Release the converted surface, if any. */
7094     if (src_surface && src_surface != orig_src)
7095         wined3d_surface_decref(src_surface);
7096
7097     return hr;
7098 }
7099
7100 /* Do not call while under the GL lock. */
7101 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7102         const RECT *dst_rect, const struct wined3d_color *color)
7103 {
7104     static const RECT src_rect;
7105     WINEDDBLTFX BltFx;
7106
7107     memset(&BltFx, 0, sizeof(BltFx));
7108     BltFx.dwSize = sizeof(BltFx);
7109     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7110     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7111             WINEDDBLT_COLORFILL, &BltFx, WINED3D_TEXF_POINT);
7112 }
7113
7114 /* Do not call while under the GL lock. */
7115 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7116         struct wined3d_surface *surface, const RECT *rect, float depth)
7117 {
7118     FIXME("Depth filling not implemented by cpu_blit.\n");
7119     return WINED3DERR_INVALIDCALL;
7120 }
7121
7122 const struct blit_shader cpu_blit =  {
7123     cpu_blit_alloc,
7124     cpu_blit_free,
7125     cpu_blit_set,
7126     cpu_blit_unset,
7127     cpu_blit_supported,
7128     cpu_blit_color_fill,
7129     cpu_blit_depth_fill,
7130 };
7131
7132 static HRESULT surface_init(struct wined3d_surface *surface, enum wined3d_surface_type surface_type, UINT alignment,
7133         UINT width, UINT height, UINT level, enum wined3d_multisample_type multisample_type,
7134         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7135         enum wined3d_pool pool, DWORD flags, void *parent, const struct wined3d_parent_ops *parent_ops)
7136 {
7137     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7138     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7139     BOOL lockable = flags & WINED3D_SURFACE_MAPPABLE;
7140     unsigned int resource_size;
7141     HRESULT hr;
7142
7143     if (multisample_quality > 0)
7144     {
7145         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7146         multisample_quality = 0;
7147     }
7148
7149     /* Quick lockable sanity check.
7150      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7151      * this function is too deep to need to care about things like this.
7152      * Levels need to be checked too, since they all affect what can be done. */
7153     switch (pool)
7154     {
7155         case WINED3D_POOL_SCRATCH:
7156             if (!lockable)
7157             {
7158                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7159                         "which are mutually exclusive, setting lockable to TRUE.\n");
7160                 lockable = TRUE;
7161             }
7162             break;
7163
7164         case WINED3D_POOL_SYSTEM_MEM:
7165             if (!lockable)
7166                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7167             break;
7168
7169         case WINED3D_POOL_MANAGED:
7170             if (usage & WINED3DUSAGE_DYNAMIC)
7171                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7172             break;
7173
7174         case WINED3D_POOL_DEFAULT:
7175             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7176                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7177             break;
7178
7179         default:
7180             FIXME("Unknown pool %#x.\n", pool);
7181             break;
7182     };
7183
7184     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3D_POOL_DEFAULT)
7185         FIXME("Trying to create a render target that isn't in the default pool.\n");
7186
7187     /* FIXME: Check that the format is supported by the device. */
7188
7189     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7190     if (!resource_size)
7191         return WINED3DERR_INVALIDCALL;
7192
7193     surface->surface_type = surface_type;
7194
7195     switch (surface_type)
7196     {
7197         case WINED3D_SURFACE_TYPE_OPENGL:
7198             surface->surface_ops = &surface_ops;
7199             break;
7200
7201         case WINED3D_SURFACE_TYPE_GDI:
7202             surface->surface_ops = &gdi_surface_ops;
7203             break;
7204
7205         default:
7206             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7207             return WINED3DERR_INVALIDCALL;
7208     }
7209
7210     hr = resource_init(&surface->resource, device, WINED3D_RTYPE_SURFACE, format,
7211             multisample_type, multisample_quality, usage, pool, width, height, 1,
7212             resource_size, parent, parent_ops, &surface_resource_ops);
7213     if (FAILED(hr))
7214     {
7215         WARN("Failed to initialize resource, returning %#x.\n", hr);
7216         return hr;
7217     }
7218
7219     /* "Standalone" surface. */
7220     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7221
7222     surface->texture_level = level;
7223     list_init(&surface->overlays);
7224
7225     /* Flags */
7226     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7227     if (flags & WINED3D_SURFACE_DISCARD)
7228         surface->flags |= SFLAG_DISCARD;
7229     if (flags & WINED3D_SURFACE_PIN_SYSMEM)
7230         surface->flags |= SFLAG_PIN_SYSMEM;
7231     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7232         surface->flags |= SFLAG_LOCKABLE;
7233     /* I'm not sure if this qualifies as a hack or as an optimization. It
7234      * seems reasonable to assume that lockable render targets will get
7235      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7236      * creation. However, the other reason we want to do this is that several
7237      * ddraw applications access surface memory while the surface isn't
7238      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7239      * future locks prevents these from crashing. */
7240     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7241         surface->flags |= SFLAG_DYNLOCK;
7242
7243     /* Mark the texture as dirty so that it gets loaded first time around. */
7244     surface_add_dirty_rect(surface, NULL);
7245     list_init(&surface->renderbuffers);
7246
7247     TRACE("surface %p, memory %p, size %u\n",
7248             surface, surface->resource.allocatedMemory, surface->resource.size);
7249
7250     /* Call the private setup routine */
7251     hr = surface->surface_ops->surface_private_setup(surface);
7252     if (FAILED(hr))
7253     {
7254         ERR("Private setup failed, returning %#x\n", hr);
7255         surface_cleanup(surface);
7256         return hr;
7257     }
7258
7259     /* Similar to lockable rendertargets above, creating the DIB section
7260      * during surface initialization prevents the sysmem pointer from changing
7261      * after a wined3d_surface_getdc() call. */
7262     if ((usage & WINED3DUSAGE_OWNDC) && !surface->hDC
7263             && SUCCEEDED(surface_create_dib_section(surface)))
7264     {
7265         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
7266         surface->resource.heapMemory = NULL;
7267         surface->resource.allocatedMemory = surface->dib.bitmap_data;
7268     }
7269
7270     return hr;
7271 }
7272
7273 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7274         enum wined3d_format_id format_id, UINT level, DWORD usage, enum wined3d_pool pool,
7275         enum wined3d_multisample_type multisample_type, DWORD multisample_quality,
7276         enum wined3d_surface_type surface_type, DWORD flags, void *parent,
7277         const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7278 {
7279     struct wined3d_surface *object;
7280     HRESULT hr;
7281
7282     TRACE("device %p, width %u, height %u, format %s, level %u\n",
7283             device, width, height, debug_d3dformat(format_id), level);
7284     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7285             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7286     TRACE("surface_type %#x, flags %#x, parent %p, parent_ops %p.\n", surface_type, flags, parent, parent_ops);
7287
7288     if (surface_type == WINED3D_SURFACE_TYPE_OPENGL && !device->adapter)
7289     {
7290         ERR("OpenGL surfaces are not available without OpenGL.\n");
7291         return WINED3DERR_NOTAVAILABLE;
7292     }
7293
7294     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7295     if (!object)
7296     {
7297         ERR("Failed to allocate surface memory.\n");
7298         return WINED3DERR_OUTOFVIDEOMEMORY;
7299     }
7300
7301     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level,
7302             multisample_type, multisample_quality, device, usage, format_id, pool, flags, parent, parent_ops);
7303     if (FAILED(hr))
7304     {
7305         WARN("Failed to initialize surface, returning %#x.\n", hr);
7306         HeapFree(GetProcessHeap(), 0, object);
7307         return hr;
7308     }
7309
7310     TRACE("Created surface %p.\n", object);
7311     *surface = object;
7312
7313     return hr;
7314 }