d3drm: Fix leakage of pData2 on error.
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2011 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         enum wined3d_texture_filter_type filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     struct wined3d_surface *overlay, *cur;
46
47     TRACE("surface %p.\n", surface);
48
49     if (surface->texture_name || (surface->flags & SFLAG_PBO)
50              || surface->rb_multisample || surface->rb_resolved
51              || !list_empty(&surface->renderbuffers))
52     {
53         struct wined3d_renderbuffer_entry *entry, *entry2;
54         const struct wined3d_gl_info *gl_info;
55         struct wined3d_context *context;
56
57         context = context_acquire(surface->resource.device, NULL);
58         gl_info = context->gl_info;
59
60         ENTER_GL();
61
62         if (surface->texture_name)
63         {
64             TRACE("Deleting texture %u.\n", surface->texture_name);
65             glDeleteTextures(1, &surface->texture_name);
66         }
67
68         if (surface->flags & SFLAG_PBO)
69         {
70             TRACE("Deleting PBO %u.\n", surface->pbo);
71             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
72         }
73
74         if (surface->rb_multisample)
75         {
76             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
77             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
78         }
79
80         if (surface->rb_resolved)
81         {
82             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
83             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
84         }
85
86         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
87         {
88             TRACE("Deleting renderbuffer %u.\n", entry->id);
89             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
90             HeapFree(GetProcessHeap(), 0, entry);
91         }
92
93         LEAVE_GL();
94
95         context_release(context);
96     }
97
98     if (surface->flags & SFLAG_DIBSECTION)
99     {
100         DeleteDC(surface->hDC);
101         DeleteObject(surface->dib.DIBsection);
102         surface->dib.bitmap_data = NULL;
103         surface->resource.allocatedMemory = NULL;
104     }
105
106     if (surface->flags & SFLAG_USERPTR)
107         wined3d_surface_set_mem(surface, NULL);
108     if (surface->overlay_dest)
109         list_remove(&surface->overlay_entry);
110
111     LIST_FOR_EACH_ENTRY_SAFE(overlay, cur, &surface->overlays, struct wined3d_surface, overlay_entry)
112     {
113         list_remove(&overlay->overlay_entry);
114         overlay->overlay_dest = NULL;
115     }
116
117     resource_cleanup(&surface->resource);
118 }
119
120 void surface_update_draw_binding(struct wined3d_surface *surface)
121 {
122     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
123         surface->draw_binding = SFLAG_INDRAWABLE;
124     else if (surface->resource.multisample_type)
125         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
126     else
127         surface->draw_binding = SFLAG_INTEXTURE;
128 }
129
130 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
131 {
132     TRACE("surface %p, container %p.\n", surface, container);
133
134     if (!container && type != WINED3D_CONTAINER_NONE)
135         ERR("Setting NULL container of type %#x.\n", type);
136
137     if (type == WINED3D_CONTAINER_SWAPCHAIN)
138     {
139         surface->get_drawable_size = get_drawable_size_swapchain;
140     }
141     else
142     {
143         switch (wined3d_settings.offscreen_rendering_mode)
144         {
145             case ORM_FBO:
146                 surface->get_drawable_size = get_drawable_size_fbo;
147                 break;
148
149             case ORM_BACKBUFFER:
150                 surface->get_drawable_size = get_drawable_size_backbuffer;
151                 break;
152
153             default:
154                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
155                 return;
156         }
157     }
158
159     surface->container.type = type;
160     surface->container.u.base = container;
161     surface_update_draw_binding(surface);
162 }
163
164 struct blt_info
165 {
166     GLenum binding;
167     GLenum bind_target;
168     enum tex_types tex_type;
169     GLfloat coords[4][3];
170 };
171
172 struct float_rect
173 {
174     float l;
175     float t;
176     float r;
177     float b;
178 };
179
180 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
181 {
182     f->l = ((r->left * 2.0f) / w) - 1.0f;
183     f->t = ((r->top * 2.0f) / h) - 1.0f;
184     f->r = ((r->right * 2.0f) / w) - 1.0f;
185     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
186 }
187
188 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
189 {
190     GLfloat (*coords)[3] = info->coords;
191     struct float_rect f;
192
193     switch (target)
194     {
195         default:
196             FIXME("Unsupported texture target %#x\n", target);
197             /* Fall back to GL_TEXTURE_2D */
198         case GL_TEXTURE_2D:
199             info->binding = GL_TEXTURE_BINDING_2D;
200             info->bind_target = GL_TEXTURE_2D;
201             info->tex_type = tex_2d;
202             coords[0][0] = (float)rect->left / w;
203             coords[0][1] = (float)rect->top / h;
204             coords[0][2] = 0.0f;
205
206             coords[1][0] = (float)rect->right / w;
207             coords[1][1] = (float)rect->top / h;
208             coords[1][2] = 0.0f;
209
210             coords[2][0] = (float)rect->left / w;
211             coords[2][1] = (float)rect->bottom / h;
212             coords[2][2] = 0.0f;
213
214             coords[3][0] = (float)rect->right / w;
215             coords[3][1] = (float)rect->bottom / h;
216             coords[3][2] = 0.0f;
217             break;
218
219         case GL_TEXTURE_RECTANGLE_ARB:
220             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
221             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
222             info->tex_type = tex_rect;
223             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
224             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
225             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
226             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
227             break;
228
229         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
230             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
231             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
232             info->tex_type = tex_cube;
233             cube_coords_float(rect, w, h, &f);
234
235             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
236             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
237             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
238             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
239             break;
240
241         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
242             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
243             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
244             info->tex_type = tex_cube;
245             cube_coords_float(rect, w, h, &f);
246
247             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
248             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
249             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
250             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
251             break;
252
253         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
254             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
255             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
256             info->tex_type = tex_cube;
257             cube_coords_float(rect, w, h, &f);
258
259             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
260             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
261             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
262             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
263             break;
264
265         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
266             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
267             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
268             info->tex_type = tex_cube;
269             cube_coords_float(rect, w, h, &f);
270
271             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
272             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
273             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
274             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
275             break;
276
277         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
278             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
279             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
280             info->tex_type = tex_cube;
281             cube_coords_float(rect, w, h, &f);
282
283             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
284             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
285             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
286             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
287             break;
288
289         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
290             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
291             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
292             info->tex_type = tex_cube;
293             cube_coords_float(rect, w, h, &f);
294
295             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
296             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
297             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
298             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
299             break;
300     }
301 }
302
303 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
304 {
305     if (rect_in)
306         *rect_out = *rect_in;
307     else
308     {
309         rect_out->left = 0;
310         rect_out->top = 0;
311         rect_out->right = surface->resource.width;
312         rect_out->bottom = surface->resource.height;
313     }
314 }
315
316 /* GL locking and context activation is done by the caller */
317 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
318         const RECT *src_rect, const RECT *dst_rect, enum wined3d_texture_filter_type filter)
319 {
320     struct blt_info info;
321
322     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
323
324     glEnable(info.bind_target);
325     checkGLcall("glEnable(bind_target)");
326
327     context_bind_texture(context, info.bind_target, src_surface->texture_name);
328
329     /* Filtering for StretchRect */
330     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
331             wined3d_gl_mag_filter(magLookup, filter));
332     checkGLcall("glTexParameteri");
333     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
334             wined3d_gl_min_mip_filter(minMipLookup, filter, WINED3D_TEXF_NONE));
335     checkGLcall("glTexParameteri");
336     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
337     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
338     if (context->gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
339         glTexParameteri(info.bind_target, GL_TEXTURE_SRGB_DECODE_EXT, GL_SKIP_DECODE_EXT);
340     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
341     checkGLcall("glTexEnvi");
342
343     /* Draw a quad */
344     glBegin(GL_TRIANGLE_STRIP);
345     glTexCoord3fv(info.coords[0]);
346     glVertex2i(dst_rect->left, dst_rect->top);
347
348     glTexCoord3fv(info.coords[1]);
349     glVertex2i(dst_rect->right, dst_rect->top);
350
351     glTexCoord3fv(info.coords[2]);
352     glVertex2i(dst_rect->left, dst_rect->bottom);
353
354     glTexCoord3fv(info.coords[3]);
355     glVertex2i(dst_rect->right, dst_rect->bottom);
356     glEnd();
357
358     /* Unbind the texture */
359     context_bind_texture(context, info.bind_target, 0);
360
361     /* We changed the filtering settings on the texture. Inform the
362      * container about this to get the filters reset properly next draw. */
363     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
364     {
365         struct wined3d_texture *texture = src_surface->container.u.texture;
366         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3D_TEXF_POINT;
367         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3D_TEXF_POINT;
368         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3D_TEXF_NONE;
369         texture->texture_rgb.states[WINED3DTEXSTA_SRGBTEXTURE] = FALSE;
370     }
371 }
372
373 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
374 {
375     const struct wined3d_format *format = surface->resource.format;
376     SYSTEM_INFO sysInfo;
377     BITMAPINFO *b_info;
378     int extraline = 0;
379     DWORD *masks;
380     UINT usage;
381     HDC dc;
382
383     TRACE("surface %p.\n", surface);
384
385     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
386     {
387         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
388         return WINED3DERR_INVALIDCALL;
389     }
390
391     switch (format->byte_count)
392     {
393         case 2:
394         case 4:
395             /* Allocate extra space to store the RGB bit masks. */
396             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
397             break;
398
399         case 3:
400             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
401             break;
402
403         default:
404             /* Allocate extra space for a palette. */
405             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
406                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
407             break;
408     }
409
410     if (!b_info)
411         return E_OUTOFMEMORY;
412
413     /* Some applications access the surface in via DWORDs, and do not take
414      * the necessary care at the end of the surface. So we need at least
415      * 4 extra bytes at the end of the surface. Check against the page size,
416      * if the last page used for the surface has at least 4 spare bytes we're
417      * safe, otherwise add an extra line to the DIB section. */
418     GetSystemInfo(&sysInfo);
419     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
420     {
421         extraline = 1;
422         TRACE("Adding an extra line to the DIB section.\n");
423     }
424
425     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
426     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
427     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
428     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
429     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
430             * wined3d_surface_get_pitch(surface);
431     b_info->bmiHeader.biPlanes = 1;
432     b_info->bmiHeader.biBitCount = format->byte_count * 8;
433
434     b_info->bmiHeader.biXPelsPerMeter = 0;
435     b_info->bmiHeader.biYPelsPerMeter = 0;
436     b_info->bmiHeader.biClrUsed = 0;
437     b_info->bmiHeader.biClrImportant = 0;
438
439     /* Get the bit masks */
440     masks = (DWORD *)b_info->bmiColors;
441     switch (surface->resource.format->id)
442     {
443         case WINED3DFMT_B8G8R8_UNORM:
444             usage = DIB_RGB_COLORS;
445             b_info->bmiHeader.biCompression = BI_RGB;
446             break;
447
448         case WINED3DFMT_B5G5R5X1_UNORM:
449         case WINED3DFMT_B5G5R5A1_UNORM:
450         case WINED3DFMT_B4G4R4A4_UNORM:
451         case WINED3DFMT_B4G4R4X4_UNORM:
452         case WINED3DFMT_B2G3R3_UNORM:
453         case WINED3DFMT_B2G3R3A8_UNORM:
454         case WINED3DFMT_R10G10B10A2_UNORM:
455         case WINED3DFMT_R8G8B8A8_UNORM:
456         case WINED3DFMT_R8G8B8X8_UNORM:
457         case WINED3DFMT_B10G10R10A2_UNORM:
458         case WINED3DFMT_B5G6R5_UNORM:
459         case WINED3DFMT_R16G16B16A16_UNORM:
460             usage = 0;
461             b_info->bmiHeader.biCompression = BI_BITFIELDS;
462             masks[0] = format->red_mask;
463             masks[1] = format->green_mask;
464             masks[2] = format->blue_mask;
465             break;
466
467         default:
468             /* Don't know palette */
469             b_info->bmiHeader.biCompression = BI_RGB;
470             usage = 0;
471             break;
472     }
473
474     if (!(dc = GetDC(0)))
475     {
476         HeapFree(GetProcessHeap(), 0, b_info);
477         return HRESULT_FROM_WIN32(GetLastError());
478     }
479
480     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
481             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
482             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
483     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
484     ReleaseDC(0, dc);
485
486     if (!surface->dib.DIBsection)
487     {
488         ERR("Failed to create DIB section.\n");
489         HeapFree(GetProcessHeap(), 0, b_info);
490         return HRESULT_FROM_WIN32(GetLastError());
491     }
492
493     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
494     /* Copy the existing surface to the dib section. */
495     if (surface->resource.allocatedMemory)
496     {
497         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
498                 surface->resource.height * wined3d_surface_get_pitch(surface));
499     }
500     else
501     {
502         /* This is to make maps read the GL texture although memory is allocated. */
503         surface->flags &= ~SFLAG_INSYSMEM;
504     }
505     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
506
507     HeapFree(GetProcessHeap(), 0, b_info);
508
509     /* Now allocate a DC. */
510     surface->hDC = CreateCompatibleDC(0);
511     SelectObject(surface->hDC, surface->dib.DIBsection);
512     TRACE("Using wined3d palette %p.\n", surface->palette);
513     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
514
515     surface->flags |= SFLAG_DIBSECTION;
516
517     return WINED3D_OK;
518 }
519
520 static BOOL surface_need_pbo(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
521 {
522     if (surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
523         return FALSE;
524     if (!(surface->flags & SFLAG_DYNLOCK))
525         return FALSE;
526     if (surface->flags & (SFLAG_CONVERTED | SFLAG_NONPOW2 | SFLAG_PIN_SYSMEM))
527         return FALSE;
528     if (!gl_info->supported[ARB_PIXEL_BUFFER_OBJECT])
529         return FALSE;
530
531     return TRUE;
532 }
533
534 static void surface_load_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
535 {
536     struct wined3d_context *context;
537     GLenum error;
538
539     context = context_acquire(surface->resource.device, NULL);
540     ENTER_GL();
541
542     GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
543     error = glGetError();
544     if (!surface->pbo || error != GL_NO_ERROR)
545         ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
546
547     TRACE("Binding PBO %u.\n", surface->pbo);
548
549     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
550     checkGLcall("glBindBufferARB");
551
552     GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
553             surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
554     checkGLcall("glBufferDataARB");
555
556     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
557     checkGLcall("glBindBufferARB");
558
559     /* We don't need the system memory anymore and we can't even use it for PBOs. */
560     if (!(surface->flags & SFLAG_CLIENT))
561     {
562         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
563         surface->resource.heapMemory = NULL;
564     }
565     surface->resource.allocatedMemory = NULL;
566     surface->flags |= SFLAG_PBO;
567     LEAVE_GL();
568     context_release(context);
569 }
570
571 static void surface_prepare_system_memory(struct wined3d_surface *surface)
572 {
573     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
574
575     TRACE("surface %p.\n", surface);
576
577     if (!(surface->flags & SFLAG_PBO) && surface_need_pbo(surface, gl_info))
578         surface_load_pbo(surface, gl_info);
579     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
580     {
581         /* Whatever surface we have, make sure that there is memory allocated
582          * for the downloaded copy, or a PBO to map. */
583         if (!surface->resource.heapMemory)
584             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
585
586         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
587                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
588
589         if (surface->flags & SFLAG_INSYSMEM)
590             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
591     }
592 }
593
594 static void surface_evict_sysmem(struct wined3d_surface *surface)
595 {
596     if (surface->flags & SFLAG_DONOTFREE)
597         return;
598
599     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
600     surface->resource.allocatedMemory = NULL;
601     surface->resource.heapMemory = NULL;
602     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
603 }
604
605 /* Context activation is done by the caller. */
606 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
607         struct wined3d_context *context, BOOL srgb)
608 {
609     struct wined3d_device *device = surface->resource.device;
610     DWORD active_sampler;
611
612     /* We don't need a specific texture unit, but after binding the texture
613      * the current unit is dirty. Read the unit back instead of switching to
614      * 0, this avoids messing around with the state manager's GL states. The
615      * current texture unit should always be a valid one.
616      *
617      * To be more specific, this is tricky because we can implicitly be
618      * called from sampler() in state.c. This means we can't touch anything
619      * other than whatever happens to be the currently active texture, or we
620      * would risk marking already applied sampler states dirty again. */
621     active_sampler = device->rev_tex_unit_map[context->active_texture];
622
623     if (active_sampler != WINED3D_UNMAPPED_STAGE)
624         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
625     surface_bind(surface, context, srgb);
626 }
627
628 static void surface_force_reload(struct wined3d_surface *surface)
629 {
630     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
631 }
632
633 static void surface_release_client_storage(struct wined3d_surface *surface)
634 {
635     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
636
637     ENTER_GL();
638     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
639     if (surface->texture_name)
640     {
641         surface_bind_and_dirtify(surface, context, FALSE);
642         glTexImage2D(surface->texture_target, surface->texture_level,
643                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
644     }
645     if (surface->texture_name_srgb)
646     {
647         surface_bind_and_dirtify(surface, context, TRUE);
648         glTexImage2D(surface->texture_target, surface->texture_level,
649                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
650     }
651     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
652     LEAVE_GL();
653
654     context_release(context);
655
656     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
657     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
658     surface_force_reload(surface);
659 }
660
661 static HRESULT surface_private_setup(struct wined3d_surface *surface)
662 {
663     /* TODO: Check against the maximum texture sizes supported by the video card. */
664     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
665     unsigned int pow2Width, pow2Height;
666
667     TRACE("surface %p.\n", surface);
668
669     surface->texture_name = 0;
670     surface->texture_target = GL_TEXTURE_2D;
671
672     /* Non-power2 support */
673     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
674     {
675         pow2Width = surface->resource.width;
676         pow2Height = surface->resource.height;
677     }
678     else
679     {
680         /* Find the nearest pow2 match */
681         pow2Width = pow2Height = 1;
682         while (pow2Width < surface->resource.width)
683             pow2Width <<= 1;
684         while (pow2Height < surface->resource.height)
685             pow2Height <<= 1;
686     }
687     surface->pow2Width = pow2Width;
688     surface->pow2Height = pow2Height;
689
690     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
691     {
692         /* TODO: Add support for non power two compressed textures. */
693         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
694         {
695             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
696                   surface, surface->resource.width, surface->resource.height);
697             return WINED3DERR_NOTAVAILABLE;
698         }
699     }
700
701     if (pow2Width != surface->resource.width
702             || pow2Height != surface->resource.height)
703     {
704         surface->flags |= SFLAG_NONPOW2;
705     }
706
707     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
708             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
709     {
710         /* One of three options:
711          * 1: Do the same as we do with NPOT and scale the texture, (any
712          *    texture ops would require the texture to be scaled which is
713          *    potentially slow)
714          * 2: Set the texture to the maximum size (bad idea).
715          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
716          * 4: Create the surface, but allow it to be used only for DirectDraw
717          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
718          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
719          *    the render target. */
720         if (surface->resource.pool == WINED3D_POOL_DEFAULT || surface->resource.pool == WINED3D_POOL_MANAGED)
721         {
722             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
723             return WINED3DERR_NOTAVAILABLE;
724         }
725
726         /* We should never use this surface in combination with OpenGL! */
727         TRACE("Creating an oversized surface: %ux%u.\n",
728                 surface->pow2Width, surface->pow2Height);
729     }
730     else
731     {
732         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
733          * and EXT_PALETTED_TEXTURE is used in combination with texture
734          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
735          * EXT_PALETTED_TEXTURE doesn't work in combination with
736          * ARB_TEXTURE_RECTANGLE. */
737         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
738                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
739                 && gl_info->supported[EXT_PALETTED_TEXTURE]
740                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
741         {
742             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
743             surface->pow2Width = surface->resource.width;
744             surface->pow2Height = surface->resource.height;
745             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
746         }
747     }
748
749     switch (wined3d_settings.offscreen_rendering_mode)
750     {
751         case ORM_FBO:
752             surface->get_drawable_size = get_drawable_size_fbo;
753             break;
754
755         case ORM_BACKBUFFER:
756             surface->get_drawable_size = get_drawable_size_backbuffer;
757             break;
758
759         default:
760             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
761             return WINED3DERR_INVALIDCALL;
762     }
763
764     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
765         surface->flags |= SFLAG_DISCARDED;
766
767     return WINED3D_OK;
768 }
769
770 static void surface_realize_palette(struct wined3d_surface *surface)
771 {
772     struct wined3d_palette *palette = surface->palette;
773
774     TRACE("surface %p.\n", surface);
775
776     if (!palette) return;
777
778     if (surface->resource.format->id == WINED3DFMT_P8_UINT
779             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
780     {
781         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
782         {
783             /* Make sure the texture is up to date. This call doesn't do
784              * anything if the texture is already up to date. */
785             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
786
787             /* We want to force a palette refresh, so mark the drawable as not being up to date */
788             if (!surface_is_offscreen(surface))
789                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
790         }
791         else
792         {
793             if (!(surface->flags & SFLAG_INSYSMEM))
794             {
795                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
796                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
797             }
798             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
799         }
800     }
801
802     if (surface->flags & SFLAG_DIBSECTION)
803     {
804         RGBQUAD col[256];
805         unsigned int i;
806
807         TRACE("Updating the DC's palette.\n");
808
809         for (i = 0; i < 256; ++i)
810         {
811             col[i].rgbRed   = palette->palents[i].peRed;
812             col[i].rgbGreen = palette->palents[i].peGreen;
813             col[i].rgbBlue  = palette->palents[i].peBlue;
814             col[i].rgbReserved = 0;
815         }
816         SetDIBColorTable(surface->hDC, 0, 256, col);
817     }
818
819     /* Propagate the changes to the drawable when we have a palette. */
820     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
821         surface_load_location(surface, surface->draw_binding, NULL);
822 }
823
824 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
825 {
826     HRESULT hr;
827
828     /* If there's no destination surface there is nothing to do. */
829     if (!surface->overlay_dest)
830         return WINED3D_OK;
831
832     /* Blt calls ModifyLocation on the dest surface, which in turn calls
833      * DrawOverlay to update the overlay. Prevent an endless recursion. */
834     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
835         return WINED3D_OK;
836
837     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
838     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
839             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3D_TEXF_LINEAR);
840     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
841
842     return hr;
843 }
844
845 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
846 {
847     struct wined3d_device *device = surface->resource.device;
848     const RECT *pass_rect = rect;
849
850     TRACE("surface %p, rect %s, flags %#x.\n",
851             surface, wine_dbgstr_rect(rect), flags);
852
853     if (flags & WINED3DLOCK_DISCARD)
854     {
855         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
856         surface_prepare_system_memory(surface);
857         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
858     }
859     else
860     {
861         /* surface_load_location() does not check if the rectangle specifies
862          * the full surface. Most callers don't need that, so do it here. */
863         if (rect && !rect->top && !rect->left
864                 && rect->right == surface->resource.width
865                 && rect->bottom == surface->resource.height)
866             pass_rect = NULL;
867         surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
868     }
869
870     if (surface->flags & SFLAG_PBO)
871     {
872         const struct wined3d_gl_info *gl_info;
873         struct wined3d_context *context;
874
875         context = context_acquire(device, NULL);
876         gl_info = context->gl_info;
877
878         ENTER_GL();
879         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
880         checkGLcall("glBindBufferARB");
881
882         /* This shouldn't happen but could occur if some other function
883          * didn't handle the PBO properly. */
884         if (surface->resource.allocatedMemory)
885             ERR("The surface already has PBO memory allocated.\n");
886
887         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
888         checkGLcall("glMapBufferARB");
889
890         /* Make sure the PBO isn't set anymore in order not to break non-PBO
891          * calls. */
892         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
893         checkGLcall("glBindBufferARB");
894
895         LEAVE_GL();
896         context_release(context);
897     }
898
899     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
900     {
901         if (!rect)
902             surface_add_dirty_rect(surface, NULL);
903         else
904         {
905             struct wined3d_box b;
906
907             b.left = rect->left;
908             b.top = rect->top;
909             b.right = rect->right;
910             b.bottom = rect->bottom;
911             b.front = 0;
912             b.back = 1;
913             surface_add_dirty_rect(surface, &b);
914         }
915     }
916 }
917
918 static void surface_unmap(struct wined3d_surface *surface)
919 {
920     struct wined3d_device *device = surface->resource.device;
921     BOOL fullsurface;
922
923     TRACE("surface %p.\n", surface);
924
925     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
926
927     if (surface->flags & SFLAG_PBO)
928     {
929         const struct wined3d_gl_info *gl_info;
930         struct wined3d_context *context;
931
932         TRACE("Freeing PBO memory.\n");
933
934         context = context_acquire(device, NULL);
935         gl_info = context->gl_info;
936
937         ENTER_GL();
938         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
939         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
940         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
941         checkGLcall("glUnmapBufferARB");
942         LEAVE_GL();
943         context_release(context);
944
945         surface->resource.allocatedMemory = NULL;
946     }
947
948     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
949
950     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
951     {
952         TRACE("Not dirtified, nothing to do.\n");
953         goto done;
954     }
955
956     /* FIXME: The ORM_BACKBUFFER case probably isn't needed, but who knows
957      * what obscure bugs in backbuffer ORM removing it will uncover. Also,
958      * this should only be needed for the frontbuffer, but that requires
959      * present calls to call surface_load_location() on the backbuffer.
960      * Fix both of those after 1.4. */
961     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
962             || (wined3d_settings.offscreen_rendering_mode == ORM_BACKBUFFER
963             && device->fb.render_targets && surface == device->fb.render_targets[0]))
964     {
965         if (!surface->dirtyRect.left && !surface->dirtyRect.top
966                 && surface->dirtyRect.right == surface->resource.width
967                 && surface->dirtyRect.bottom == surface->resource.height)
968         {
969             fullsurface = TRUE;
970         }
971         else
972         {
973             /* TODO: Proper partial rectangle tracking. */
974             fullsurface = FALSE;
975             surface->flags |= SFLAG_INSYSMEM;
976         }
977
978         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
979
980         /* Partial rectangle tracking is not commonly implemented, it is only
981          * done for render targets. INSYSMEM was set before to tell
982          * surface_load_location() where to read the rectangle from.
983          * Indrawable is set because all modifications from the partial
984          * sysmem copy are written back to the drawable, thus the surface is
985          * merged again in the drawable. The sysmem copy is not fully up to
986          * date because only a subrectangle was read in Map(). */
987         if (!fullsurface)
988         {
989             surface_modify_location(surface, surface->draw_binding, TRUE);
990             surface_evict_sysmem(surface);
991         }
992
993         surface->dirtyRect.left = surface->resource.width;
994         surface->dirtyRect.top = surface->resource.height;
995         surface->dirtyRect.right = 0;
996         surface->dirtyRect.bottom = 0;
997     }
998     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
999     {
1000         FIXME("Depth / stencil buffer locking is not implemented.\n");
1001     }
1002
1003 done:
1004     /* Overlays have to be redrawn manually after changes with the GL implementation */
1005     if (surface->overlay_dest)
1006         surface_draw_overlay(surface);
1007 }
1008
1009 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1010 {
1011     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1012         return FALSE;
1013     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1014         return FALSE;
1015     return TRUE;
1016 }
1017
1018 static void wined3d_surface_depth_blt_fbo(const struct wined3d_device *device, struct wined3d_surface *src_surface,
1019         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1020 {
1021     const struct wined3d_gl_info *gl_info;
1022     struct wined3d_context *context;
1023     DWORD src_mask, dst_mask;
1024     GLbitfield gl_mask;
1025
1026     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1027             device, src_surface, wine_dbgstr_rect(src_rect),
1028             dst_surface, wine_dbgstr_rect(dst_rect));
1029
1030     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1031     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1032
1033     if (src_mask != dst_mask)
1034     {
1035         ERR("Incompatible formats %s and %s.\n",
1036                 debug_d3dformat(src_surface->resource.format->id),
1037                 debug_d3dformat(dst_surface->resource.format->id));
1038         return;
1039     }
1040
1041     if (!src_mask)
1042     {
1043         ERR("Not a depth / stencil format: %s.\n",
1044                 debug_d3dformat(src_surface->resource.format->id));
1045         return;
1046     }
1047
1048     gl_mask = 0;
1049     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1050         gl_mask |= GL_DEPTH_BUFFER_BIT;
1051     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1052         gl_mask |= GL_STENCIL_BUFFER_BIT;
1053
1054     /* Make sure the locations are up-to-date. Loading the destination
1055      * surface isn't required if the entire surface is overwritten. */
1056     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1057     if (!surface_is_full_rect(dst_surface, dst_rect))
1058         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1059
1060     context = context_acquire(device, NULL);
1061     if (!context->valid)
1062     {
1063         context_release(context);
1064         WARN("Invalid context, skipping blit.\n");
1065         return;
1066     }
1067
1068     gl_info = context->gl_info;
1069
1070     ENTER_GL();
1071
1072     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1073     glReadBuffer(GL_NONE);
1074     checkGLcall("glReadBuffer()");
1075     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1076
1077     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1078     context_set_draw_buffer(context, GL_NONE);
1079     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1080     context_invalidate_state(context, STATE_FRAMEBUFFER);
1081
1082     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1083     {
1084         glDepthMask(GL_TRUE);
1085         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_ZWRITEENABLE));
1086     }
1087     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1088     {
1089         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1090         {
1091             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1092             context_invalidate_state(context, STATE_RENDER(WINED3D_RS_TWOSIDEDSTENCILMODE));
1093         }
1094         glStencilMask(~0U);
1095         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_STENCILWRITEMASK));
1096     }
1097
1098     glDisable(GL_SCISSOR_TEST);
1099     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1100
1101     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1102             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1103     checkGLcall("glBlitFramebuffer()");
1104
1105     LEAVE_GL();
1106
1107     if (wined3d_settings.strict_draw_ordering)
1108         wglFlush(); /* Flush to ensure ordering across contexts. */
1109
1110     context_release(context);
1111 }
1112
1113 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1114  * Depth / stencil is not supported. */
1115 static void surface_blt_fbo(const struct wined3d_device *device, enum wined3d_texture_filter_type filter,
1116         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1117         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1118 {
1119     const struct wined3d_gl_info *gl_info;
1120     struct wined3d_context *context;
1121     RECT src_rect, dst_rect;
1122     GLenum gl_filter;
1123     GLenum buffer;
1124
1125     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1126     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1127             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1128     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1129             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1130
1131     src_rect = *src_rect_in;
1132     dst_rect = *dst_rect_in;
1133
1134     switch (filter)
1135     {
1136         case WINED3D_TEXF_LINEAR:
1137             gl_filter = GL_LINEAR;
1138             break;
1139
1140         default:
1141             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1142         case WINED3D_TEXF_NONE:
1143         case WINED3D_TEXF_POINT:
1144             gl_filter = GL_NEAREST;
1145             break;
1146     }
1147
1148     /* Resolve the source surface first if needed. */
1149     if (src_location == SFLAG_INRB_MULTISAMPLE
1150             && (src_surface->resource.format->id != dst_surface->resource.format->id
1151                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1152                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1153         src_location = SFLAG_INRB_RESOLVED;
1154
1155     /* Make sure the locations are up-to-date. Loading the destination
1156      * surface isn't required if the entire surface is overwritten. (And is
1157      * in fact harmful if we're being called by surface_load_location() with
1158      * the purpose of loading the destination surface.) */
1159     surface_load_location(src_surface, src_location, NULL);
1160     if (!surface_is_full_rect(dst_surface, &dst_rect))
1161         surface_load_location(dst_surface, dst_location, NULL);
1162
1163     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1164     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1165     else context = context_acquire(device, NULL);
1166
1167     if (!context->valid)
1168     {
1169         context_release(context);
1170         WARN("Invalid context, skipping blit.\n");
1171         return;
1172     }
1173
1174     gl_info = context->gl_info;
1175
1176     if (src_location == SFLAG_INDRAWABLE)
1177     {
1178         TRACE("Source surface %p is onscreen.\n", src_surface);
1179         buffer = surface_get_gl_buffer(src_surface);
1180         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1181     }
1182     else
1183     {
1184         TRACE("Source surface %p is offscreen.\n", src_surface);
1185         buffer = GL_COLOR_ATTACHMENT0;
1186     }
1187
1188     ENTER_GL();
1189     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1190     glReadBuffer(buffer);
1191     checkGLcall("glReadBuffer()");
1192     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1193     LEAVE_GL();
1194
1195     if (dst_location == SFLAG_INDRAWABLE)
1196     {
1197         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1198         buffer = surface_get_gl_buffer(dst_surface);
1199         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1200     }
1201     else
1202     {
1203         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1204         buffer = GL_COLOR_ATTACHMENT0;
1205     }
1206
1207     ENTER_GL();
1208     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1209     context_set_draw_buffer(context, buffer);
1210     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1211     context_invalidate_state(context, STATE_FRAMEBUFFER);
1212
1213     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1214     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE));
1215     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE1));
1216     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE2));
1217     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE3));
1218
1219     glDisable(GL_SCISSOR_TEST);
1220     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1221
1222     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1223             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1224     checkGLcall("glBlitFramebuffer()");
1225
1226     LEAVE_GL();
1227
1228     if (wined3d_settings.strict_draw_ordering
1229             || (dst_location == SFLAG_INDRAWABLE
1230             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1231         wglFlush();
1232
1233     context_release(context);
1234 }
1235
1236 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1237         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
1238         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
1239 {
1240     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1241         return FALSE;
1242
1243     /* Source and/or destination need to be on the GL side */
1244     if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM)
1245         return FALSE;
1246
1247     switch (blit_op)
1248     {
1249         case WINED3D_BLIT_OP_COLOR_BLIT:
1250             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1251                 return FALSE;
1252             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1253                 return FALSE;
1254             break;
1255
1256         case WINED3D_BLIT_OP_DEPTH_BLIT:
1257             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1258                 return FALSE;
1259             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1260                 return FALSE;
1261             break;
1262
1263         default:
1264             return FALSE;
1265     }
1266
1267     if (!(src_format->id == dst_format->id
1268             || (is_identity_fixup(src_format->color_fixup)
1269             && is_identity_fixup(dst_format->color_fixup))))
1270         return FALSE;
1271
1272     return TRUE;
1273 }
1274
1275 /* This function checks if the primary render target uses the 8bit paletted format. */
1276 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1277 {
1278     if (device->fb.render_targets && device->fb.render_targets[0])
1279     {
1280         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1281         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1282                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1283             return TRUE;
1284     }
1285     return FALSE;
1286 }
1287
1288 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1289         DWORD color, struct wined3d_color *float_color)
1290 {
1291     const struct wined3d_format *format = surface->resource.format;
1292     const struct wined3d_device *device = surface->resource.device;
1293
1294     switch (format->id)
1295     {
1296         case WINED3DFMT_P8_UINT:
1297             if (surface->palette)
1298             {
1299                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1300                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1301                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1302             }
1303             else
1304             {
1305                 float_color->r = 0.0f;
1306                 float_color->g = 0.0f;
1307                 float_color->b = 0.0f;
1308             }
1309             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1310             break;
1311
1312         case WINED3DFMT_B5G6R5_UNORM:
1313             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1314             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1315             float_color->b = (color & 0x1f) / 31.0f;
1316             float_color->a = 1.0f;
1317             break;
1318
1319         case WINED3DFMT_B8G8R8_UNORM:
1320         case WINED3DFMT_B8G8R8X8_UNORM:
1321             float_color->r = D3DCOLOR_R(color);
1322             float_color->g = D3DCOLOR_G(color);
1323             float_color->b = D3DCOLOR_B(color);
1324             float_color->a = 1.0f;
1325             break;
1326
1327         case WINED3DFMT_B8G8R8A8_UNORM:
1328             float_color->r = D3DCOLOR_R(color);
1329             float_color->g = D3DCOLOR_G(color);
1330             float_color->b = D3DCOLOR_B(color);
1331             float_color->a = D3DCOLOR_A(color);
1332             break;
1333
1334         default:
1335             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1336             return FALSE;
1337     }
1338
1339     return TRUE;
1340 }
1341
1342 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1343 {
1344     const struct wined3d_format *format = surface->resource.format;
1345
1346     switch (format->id)
1347     {
1348         case WINED3DFMT_S1_UINT_D15_UNORM:
1349             *float_depth = depth / (float)0x00007fff;
1350             break;
1351
1352         case WINED3DFMT_D16_UNORM:
1353             *float_depth = depth / (float)0x0000ffff;
1354             break;
1355
1356         case WINED3DFMT_D24_UNORM_S8_UINT:
1357         case WINED3DFMT_X8D24_UNORM:
1358             *float_depth = depth / (float)0x00ffffff;
1359             break;
1360
1361         case WINED3DFMT_D32_UNORM:
1362             *float_depth = depth / (float)0xffffffff;
1363             break;
1364
1365         default:
1366             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1367             return FALSE;
1368     }
1369
1370     return TRUE;
1371 }
1372
1373 /* Do not call while under the GL lock. */
1374 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1375 {
1376     const struct wined3d_resource *resource = &surface->resource;
1377     struct wined3d_device *device = resource->device;
1378     const struct blit_shader *blitter;
1379
1380     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1381             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1382     if (!blitter)
1383     {
1384         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1385         return WINED3DERR_INVALIDCALL;
1386     }
1387
1388     return blitter->depth_fill(device, surface, rect, depth);
1389 }
1390
1391 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1392         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1393 {
1394     struct wined3d_device *device = src_surface->resource.device;
1395
1396     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1397             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1398             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1399         return WINED3DERR_INVALIDCALL;
1400
1401     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1402
1403     surface_modify_ds_location(dst_surface, SFLAG_INTEXTURE,
1404             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1405
1406     return WINED3D_OK;
1407 }
1408
1409 /* Do not call while under the GL lock. */
1410 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1411         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1412         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter)
1413 {
1414     struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1415     struct wined3d_device *device = dst_surface->resource.device;
1416     DWORD src_ds_flags, dst_ds_flags;
1417     RECT src_rect, dst_rect;
1418     BOOL scale, convert;
1419
1420     static const DWORD simple_blit = WINEDDBLT_ASYNC
1421             | WINEDDBLT_COLORFILL
1422             | WINEDDBLT_WAIT
1423             | WINEDDBLT_DEPTHFILL
1424             | WINEDDBLT_DONOTWAIT;
1425
1426     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1427             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1428             flags, fx, debug_d3dtexturefiltertype(filter));
1429     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1430
1431     if (fx)
1432     {
1433         TRACE("dwSize %#x.\n", fx->dwSize);
1434         TRACE("dwDDFX %#x.\n", fx->dwDDFX);
1435         TRACE("dwROP %#x.\n", fx->dwROP);
1436         TRACE("dwDDROP %#x.\n", fx->dwDDROP);
1437         TRACE("dwRotationAngle %#x.\n", fx->dwRotationAngle);
1438         TRACE("dwZBufferOpCode %#x.\n", fx->dwZBufferOpCode);
1439         TRACE("dwZBufferLow %#x.\n", fx->dwZBufferLow);
1440         TRACE("dwZBufferHigh %#x.\n", fx->dwZBufferHigh);
1441         TRACE("dwZBufferBaseDest %#x.\n", fx->dwZBufferBaseDest);
1442         TRACE("dwZDestConstBitDepth %#x.\n", fx->dwZDestConstBitDepth);
1443         TRACE("lpDDSZBufferDest %p.\n", fx->u1.lpDDSZBufferDest);
1444         TRACE("dwZSrcConstBitDepth %#x.\n", fx->dwZSrcConstBitDepth);
1445         TRACE("lpDDSZBufferSrc %p.\n", fx->u2.lpDDSZBufferSrc);
1446         TRACE("dwAlphaEdgeBlendBitDepth %#x.\n", fx->dwAlphaEdgeBlendBitDepth);
1447         TRACE("dwAlphaEdgeBlend %#x.\n", fx->dwAlphaEdgeBlend);
1448         TRACE("dwReserved %#x.\n", fx->dwReserved);
1449         TRACE("dwAlphaDestConstBitDepth %#x.\n", fx->dwAlphaDestConstBitDepth);
1450         TRACE("lpDDSAlphaDest %p.\n", fx->u3.lpDDSAlphaDest);
1451         TRACE("dwAlphaSrcConstBitDepth %#x.\n", fx->dwAlphaSrcConstBitDepth);
1452         TRACE("lpDDSAlphaSrc %p.\n", fx->u4.lpDDSAlphaSrc);
1453         TRACE("lpDDSPattern %p.\n", fx->u5.lpDDSPattern);
1454         TRACE("ddckDestColorkey {%#x, %#x}.\n",
1455                 fx->ddckDestColorkey.color_space_low_value,
1456                 fx->ddckDestColorkey.color_space_high_value);
1457         TRACE("ddckSrcColorkey {%#x, %#x}.\n",
1458                 fx->ddckSrcColorkey.color_space_low_value,
1459                 fx->ddckSrcColorkey.color_space_high_value);
1460     }
1461
1462     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1463     {
1464         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1465         return WINEDDERR_SURFACEBUSY;
1466     }
1467
1468     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1469
1470     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1471             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1472             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1473             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1474             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1475     {
1476         WARN("The application gave us a bad destination rectangle.\n");
1477         return WINEDDERR_INVALIDRECT;
1478     }
1479
1480     if (src_surface)
1481     {
1482         surface_get_rect(src_surface, src_rect_in, &src_rect);
1483
1484         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1485                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1486                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1487                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1488                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1489         {
1490             WARN("Application gave us bad source rectangle for Blt.\n");
1491             return WINEDDERR_INVALIDRECT;
1492         }
1493     }
1494     else
1495     {
1496         memset(&src_rect, 0, sizeof(src_rect));
1497     }
1498
1499     if (!fx || !(fx->dwDDFX))
1500         flags &= ~WINEDDBLT_DDFX;
1501
1502     if (flags & WINEDDBLT_WAIT)
1503         flags &= ~WINEDDBLT_WAIT;
1504
1505     if (flags & WINEDDBLT_ASYNC)
1506     {
1507         static unsigned int once;
1508
1509         if (!once++)
1510             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1511         flags &= ~WINEDDBLT_ASYNC;
1512     }
1513
1514     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1515     if (flags & WINEDDBLT_DONOTWAIT)
1516     {
1517         static unsigned int once;
1518
1519         if (!once++)
1520             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1521         flags &= ~WINEDDBLT_DONOTWAIT;
1522     }
1523
1524     if (!device->d3d_initialized)
1525     {
1526         WARN("D3D not initialized, using fallback.\n");
1527         goto cpu;
1528     }
1529
1530     /* We want to avoid invalidating the sysmem location for converted
1531      * surfaces, since otherwise we'd have to convert the data back when
1532      * locking them. */
1533     if (dst_surface->flags & SFLAG_CONVERTED)
1534     {
1535         WARN("Converted surface, using CPU blit.\n");
1536         return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1537     }
1538
1539     if (flags & ~simple_blit)
1540     {
1541         WARN("Using fallback for complex blit (%#x).\n", flags);
1542         goto fallback;
1543     }
1544
1545     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1546         src_swapchain = src_surface->container.u.swapchain;
1547     else
1548         src_swapchain = NULL;
1549
1550     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1551         dst_swapchain = dst_surface->container.u.swapchain;
1552     else
1553         dst_swapchain = NULL;
1554
1555     /* This isn't strictly needed. FBO blits for example could deal with
1556      * cross-swapchain blits by first downloading the source to a texture
1557      * before switching to the destination context. We just have this here to
1558      * not have to deal with the issue, since cross-swapchain blits should be
1559      * rare. */
1560     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1561     {
1562         FIXME("Using fallback for cross-swapchain blit.\n");
1563         goto fallback;
1564     }
1565
1566     scale = src_surface
1567             && (src_rect.right - src_rect.left != dst_rect.right - dst_rect.left
1568             || src_rect.bottom - src_rect.top != dst_rect.bottom - dst_rect.top);
1569     convert = src_surface && src_surface->resource.format->id != dst_surface->resource.format->id;
1570
1571     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1572     if (src_surface)
1573         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1574     else
1575         src_ds_flags = 0;
1576
1577     if (src_ds_flags || dst_ds_flags)
1578     {
1579         if (flags & WINEDDBLT_DEPTHFILL)
1580         {
1581             float depth;
1582
1583             TRACE("Depth fill.\n");
1584
1585             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1586                 return WINED3DERR_INVALIDCALL;
1587
1588             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1589                 return WINED3D_OK;
1590         }
1591         else
1592         {
1593             if (src_ds_flags != dst_ds_flags)
1594             {
1595                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1596                 return WINED3DERR_INVALIDCALL;
1597             }
1598
1599             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1600                 return WINED3D_OK;
1601         }
1602     }
1603     else
1604     {
1605         /* In principle this would apply to depth blits as well, but we don't
1606          * implement those in the CPU blitter at the moment. */
1607         if ((dst_surface->flags & SFLAG_INSYSMEM)
1608                 && (!src_surface || (src_surface->flags & SFLAG_INSYSMEM)))
1609         {
1610             if (scale)
1611                 TRACE("Not doing sysmem blit because of scaling.\n");
1612             else if (convert)
1613                 TRACE("Not doing sysmem blit because of format conversion.\n");
1614             else
1615                 return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1616         }
1617
1618         if (flags & WINEDDBLT_COLORFILL)
1619         {
1620             struct wined3d_color color;
1621
1622             TRACE("Color fill.\n");
1623
1624             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1625                 goto fallback;
1626
1627             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1628                 return WINED3D_OK;
1629         }
1630         else
1631         {
1632             TRACE("Color blit.\n");
1633
1634             /* Upload */
1635             if ((src_surface->flags & SFLAG_INSYSMEM) && !(dst_surface->flags & SFLAG_INSYSMEM))
1636             {
1637                 if (scale)
1638                     TRACE("Not doing upload because of scaling.\n");
1639                 else if (convert)
1640                     TRACE("Not doing upload because of format conversion.\n");
1641                 else
1642                 {
1643                     POINT dst_point = {dst_rect.left, dst_rect.top};
1644
1645                     if (SUCCEEDED(surface_upload_from_surface(dst_surface, &dst_point, src_surface, &src_rect)))
1646                     {
1647                         if (!surface_is_offscreen(dst_surface))
1648                             surface_load_location(dst_surface, dst_surface->draw_binding, NULL);
1649                         return WINED3D_OK;
1650                     }
1651                 }
1652             }
1653
1654             /* Use present for back -> front blits. The idea behind this is
1655              * that present is potentially faster than a blit, in particular
1656              * when FBO blits aren't available. Some ddraw applications like
1657              * Half-Life and Prince of Persia 3D use Blt() from the backbuffer
1658              * to the frontbuffer instead of doing a Flip(). D3D8 and D3D9
1659              * applications can't blit directly to the frontbuffer. */
1660             if (dst_swapchain && dst_swapchain->back_buffers
1661                     && dst_surface == dst_swapchain->front_buffer
1662                     && src_surface == dst_swapchain->back_buffers[0])
1663             {
1664                 enum wined3d_swap_effect swap_effect = dst_swapchain->desc.swap_effect;
1665
1666                 TRACE("Using present for backbuffer -> frontbuffer blit.\n");
1667
1668                 /* Set the swap effect to COPY, we don't want the backbuffer
1669                  * to become undefined. */
1670                 dst_swapchain->desc.swap_effect = WINED3D_SWAP_EFFECT_COPY;
1671                 wined3d_swapchain_present(dst_swapchain, NULL, NULL, dst_swapchain->win_handle, NULL, 0);
1672                 dst_swapchain->desc.swap_effect = swap_effect;
1673
1674                 return WINED3D_OK;
1675             }
1676
1677             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1678                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1679                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1680             {
1681                 TRACE("Using FBO blit.\n");
1682
1683                 surface_blt_fbo(device, filter,
1684                         src_surface, src_surface->draw_binding, &src_rect,
1685                         dst_surface, dst_surface->draw_binding, &dst_rect);
1686                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1687                 return WINED3D_OK;
1688             }
1689
1690             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1691                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1692                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1693             {
1694                 TRACE("Using arbfp blit.\n");
1695
1696                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1697                     return WINED3D_OK;
1698             }
1699         }
1700     }
1701
1702 fallback:
1703
1704     /* Special cases for render targets. */
1705     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1706             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1707     {
1708         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1709                 src_surface, &src_rect, flags, fx, filter)))
1710             return WINED3D_OK;
1711     }
1712
1713 cpu:
1714
1715     /* For the rest call the X11 surface implementation. For render targets
1716      * this should be implemented OpenGL accelerated in BltOverride, other
1717      * blits are rather rare. */
1718     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1719 }
1720
1721 HRESULT CDECL wined3d_surface_get_render_target_data(struct wined3d_surface *surface,
1722         struct wined3d_surface *render_target)
1723 {
1724     TRACE("surface %p, render_target %p.\n", surface, render_target);
1725
1726     /* TODO: Check surface sizes, pools, etc. */
1727
1728     if (render_target->resource.multisample_type)
1729         return WINED3DERR_INVALIDCALL;
1730
1731     return wined3d_surface_blt(surface, NULL, render_target, NULL, 0, NULL, WINED3D_TEXF_POINT);
1732 }
1733
1734 /* Context activation is done by the caller. */
1735 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1736 {
1737     if (surface->flags & SFLAG_DIBSECTION)
1738     {
1739         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1740     }
1741     else
1742     {
1743         if (!surface->resource.heapMemory)
1744             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1745         else if (!(surface->flags & SFLAG_CLIENT))
1746             ERR("Surface %p has heapMemory %p and flags %#x.\n",
1747                     surface, surface->resource.heapMemory, surface->flags);
1748
1749         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1750                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1751     }
1752
1753     ENTER_GL();
1754     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1755     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1756     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1757             surface->resource.size, surface->resource.allocatedMemory));
1758     checkGLcall("glGetBufferSubDataARB");
1759     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1760     checkGLcall("glDeleteBuffersARB");
1761     LEAVE_GL();
1762
1763     surface->pbo = 0;
1764     surface->flags &= ~SFLAG_PBO;
1765 }
1766
1767 /* Do not call while under the GL lock. */
1768 static void surface_unload(struct wined3d_resource *resource)
1769 {
1770     struct wined3d_surface *surface = surface_from_resource(resource);
1771     struct wined3d_renderbuffer_entry *entry, *entry2;
1772     struct wined3d_device *device = resource->device;
1773     const struct wined3d_gl_info *gl_info;
1774     struct wined3d_context *context;
1775
1776     TRACE("surface %p.\n", surface);
1777
1778     if (resource->pool == WINED3D_POOL_DEFAULT)
1779     {
1780         /* Default pool resources are supposed to be destroyed before Reset is called.
1781          * Implicit resources stay however. So this means we have an implicit render target
1782          * or depth stencil. The content may be destroyed, but we still have to tear down
1783          * opengl resources, so we cannot leave early.
1784          *
1785          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1786          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1787          * or the depth stencil into an FBO the texture or render buffer will be removed
1788          * and all flags get lost
1789          */
1790         if (!(surface->flags & SFLAG_PBO))
1791             surface_init_sysmem(surface);
1792         /* We also get here when the ddraw swapchain is destroyed, for example
1793          * for a mode switch. In this case this surface won't necessarily be
1794          * an implicit surface. We have to mark it lost so that the
1795          * application can restore it after the mode switch. */
1796         surface->flags |= SFLAG_LOST;
1797     }
1798     else
1799     {
1800         /* Load the surface into system memory */
1801         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1802         surface_modify_location(surface, surface->draw_binding, FALSE);
1803     }
1804     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1805     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1806     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1807
1808     context = context_acquire(device, NULL);
1809     gl_info = context->gl_info;
1810
1811     /* Destroy PBOs, but load them into real sysmem before */
1812     if (surface->flags & SFLAG_PBO)
1813         surface_remove_pbo(surface, gl_info);
1814
1815     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1816      * all application-created targets the application has to release the surface
1817      * before calling _Reset
1818      */
1819     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1820     {
1821         ENTER_GL();
1822         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1823         LEAVE_GL();
1824         list_remove(&entry->entry);
1825         HeapFree(GetProcessHeap(), 0, entry);
1826     }
1827     list_init(&surface->renderbuffers);
1828     surface->current_renderbuffer = NULL;
1829
1830     ENTER_GL();
1831
1832     /* If we're in a texture, the texture name belongs to the texture.
1833      * Otherwise, destroy it. */
1834     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1835     {
1836         glDeleteTextures(1, &surface->texture_name);
1837         surface->texture_name = 0;
1838         glDeleteTextures(1, &surface->texture_name_srgb);
1839         surface->texture_name_srgb = 0;
1840     }
1841     if (surface->rb_multisample)
1842     {
1843         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1844         surface->rb_multisample = 0;
1845     }
1846     if (surface->rb_resolved)
1847     {
1848         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1849         surface->rb_resolved = 0;
1850     }
1851
1852     LEAVE_GL();
1853
1854     context_release(context);
1855
1856     resource_unload(resource);
1857 }
1858
1859 static const struct wined3d_resource_ops surface_resource_ops =
1860 {
1861     surface_unload,
1862 };
1863
1864 static const struct wined3d_surface_ops surface_ops =
1865 {
1866     surface_private_setup,
1867     surface_realize_palette,
1868     surface_map,
1869     surface_unmap,
1870 };
1871
1872 /*****************************************************************************
1873  * Initializes the GDI surface, aka creates the DIB section we render to
1874  * The DIB section creation is done by calling GetDC, which will create the
1875  * section and releasing the dc to allow the app to use it. The dib section
1876  * will stay until the surface is released
1877  *
1878  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1879  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1880  * avoid confusion in the shared surface code.
1881  *
1882  * Returns:
1883  *  WINED3D_OK on success
1884  *  The return values of called methods on failure
1885  *
1886  *****************************************************************************/
1887 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1888 {
1889     HRESULT hr;
1890
1891     TRACE("surface %p.\n", surface);
1892
1893     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1894     {
1895         ERR("Overlays not yet supported by GDI surfaces.\n");
1896         return WINED3DERR_INVALIDCALL;
1897     }
1898
1899     /* Sysmem textures have memory already allocated - release it,
1900      * this avoids an unnecessary memcpy. */
1901     hr = surface_create_dib_section(surface);
1902     if (SUCCEEDED(hr))
1903     {
1904         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1905         surface->resource.heapMemory = NULL;
1906         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1907     }
1908
1909     /* We don't mind the nonpow2 stuff in GDI. */
1910     surface->pow2Width = surface->resource.width;
1911     surface->pow2Height = surface->resource.height;
1912
1913     return WINED3D_OK;
1914 }
1915
1916 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1917 {
1918     struct wined3d_palette *palette = surface->palette;
1919
1920     TRACE("surface %p.\n", surface);
1921
1922     if (!palette) return;
1923
1924     if (surface->flags & SFLAG_DIBSECTION)
1925     {
1926         RGBQUAD col[256];
1927         unsigned int i;
1928
1929         TRACE("Updating the DC's palette.\n");
1930
1931         for (i = 0; i < 256; ++i)
1932         {
1933             col[i].rgbRed = palette->palents[i].peRed;
1934             col[i].rgbGreen = palette->palents[i].peGreen;
1935             col[i].rgbBlue = palette->palents[i].peBlue;
1936             col[i].rgbReserved = 0;
1937         }
1938         SetDIBColorTable(surface->hDC, 0, 256, col);
1939     }
1940
1941     /* Update the image because of the palette change. Some games like e.g.
1942      * Red Alert call SetEntries a lot to implement fading. */
1943     /* Tell the swapchain to update the screen. */
1944     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1945     {
1946         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1947         if (surface == swapchain->front_buffer)
1948         {
1949             x11_copy_to_screen(swapchain, NULL);
1950         }
1951     }
1952 }
1953
1954 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
1955 {
1956     TRACE("surface %p, rect %s, flags %#x.\n",
1957             surface, wine_dbgstr_rect(rect), flags);
1958
1959     if (!(surface->flags & SFLAG_DIBSECTION))
1960     {
1961         HRESULT hr;
1962
1963         /* This happens on gdi surfaces if the application set a user pointer
1964          * and resets it. Recreate the DIB section. */
1965         if (FAILED(hr = surface_create_dib_section(surface)))
1966         {
1967             ERR("Failed to create dib section, hr %#x.\n", hr);
1968             return;
1969         }
1970         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1971         surface->resource.heapMemory = NULL;
1972         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1973     }
1974 }
1975
1976 static void gdi_surface_unmap(struct wined3d_surface *surface)
1977 {
1978     TRACE("surface %p.\n", surface);
1979
1980     /* Tell the swapchain to update the screen. */
1981     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1982     {
1983         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1984         if (surface == swapchain->front_buffer)
1985         {
1986             x11_copy_to_screen(swapchain, &surface->lockedRect);
1987         }
1988     }
1989
1990     memset(&surface->lockedRect, 0, sizeof(RECT));
1991 }
1992
1993 static const struct wined3d_surface_ops gdi_surface_ops =
1994 {
1995     gdi_surface_private_setup,
1996     gdi_surface_realize_palette,
1997     gdi_surface_map,
1998     gdi_surface_unmap,
1999 };
2000
2001 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2002 {
2003     GLuint *name;
2004     DWORD flag;
2005
2006     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2007
2008     if(srgb)
2009     {
2010         name = &surface->texture_name_srgb;
2011         flag = SFLAG_INSRGBTEX;
2012     }
2013     else
2014     {
2015         name = &surface->texture_name;
2016         flag = SFLAG_INTEXTURE;
2017     }
2018
2019     if (!*name && new_name)
2020     {
2021         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2022          * surface has no texture name yet. See if we can get rid of this. */
2023         if (surface->flags & flag)
2024         {
2025             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2026             surface_modify_location(surface, flag, FALSE);
2027         }
2028     }
2029
2030     *name = new_name;
2031     surface_force_reload(surface);
2032 }
2033
2034 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
2035 {
2036     TRACE("surface %p, target %#x.\n", surface, target);
2037
2038     if (surface->texture_target != target)
2039     {
2040         if (target == GL_TEXTURE_RECTANGLE_ARB)
2041         {
2042             surface->flags &= ~SFLAG_NORMCOORD;
2043         }
2044         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2045         {
2046             surface->flags |= SFLAG_NORMCOORD;
2047         }
2048     }
2049     surface->texture_target = target;
2050     surface_force_reload(surface);
2051 }
2052
2053 /* Context activation is done by the caller. */
2054 void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
2055 {
2056     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
2057
2058     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2059     {
2060         struct wined3d_texture *texture = surface->container.u.texture;
2061
2062         TRACE("Passing to container (%p).\n", texture);
2063         texture->texture_ops->texture_bind(texture, context, srgb);
2064     }
2065     else
2066     {
2067         if (surface->texture_level)
2068         {
2069             ERR("Standalone surface %p is non-zero texture level %u.\n",
2070                     surface, surface->texture_level);
2071         }
2072
2073         if (srgb)
2074             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
2075
2076         ENTER_GL();
2077
2078         if (!surface->texture_name)
2079         {
2080             glGenTextures(1, &surface->texture_name);
2081             checkGLcall("glGenTextures");
2082
2083             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
2084
2085             context_bind_texture(context, surface->texture_target, surface->texture_name);
2086             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2087             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2088             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
2089             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2090             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2091             checkGLcall("glTexParameteri");
2092         }
2093         else
2094         {
2095             context_bind_texture(context, surface->texture_target, surface->texture_name);
2096         }
2097
2098         LEAVE_GL();
2099     }
2100 }
2101
2102 /* This call just downloads data, the caller is responsible for binding the
2103  * correct texture. */
2104 /* Context activation is done by the caller. */
2105 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2106 {
2107     const struct wined3d_format *format = surface->resource.format;
2108
2109     /* Only support read back of converted P8 surfaces. */
2110     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2111     {
2112         ERR("Trying to read back converted surface %p with format %s.\n", surface, debug_d3dformat(format->id));
2113         return;
2114     }
2115
2116     ENTER_GL();
2117
2118     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2119     {
2120         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2121                 surface, surface->texture_level, format->glFormat, format->glType,
2122                 surface->resource.allocatedMemory);
2123
2124         if (surface->flags & SFLAG_PBO)
2125         {
2126             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2127             checkGLcall("glBindBufferARB");
2128             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2129             checkGLcall("glGetCompressedTexImageARB");
2130             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2131             checkGLcall("glBindBufferARB");
2132         }
2133         else
2134         {
2135             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2136                     surface->texture_level, surface->resource.allocatedMemory));
2137             checkGLcall("glGetCompressedTexImageARB");
2138         }
2139
2140         LEAVE_GL();
2141     }
2142     else
2143     {
2144         void *mem;
2145         GLenum gl_format = format->glFormat;
2146         GLenum gl_type = format->glType;
2147         int src_pitch = 0;
2148         int dst_pitch = 0;
2149
2150         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2151         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2152         {
2153             gl_format = GL_ALPHA;
2154             gl_type = GL_UNSIGNED_BYTE;
2155         }
2156
2157         if (surface->flags & SFLAG_NONPOW2)
2158         {
2159             unsigned char alignment = surface->resource.device->surface_alignment;
2160             src_pitch = format->byte_count * surface->pow2Width;
2161             dst_pitch = wined3d_surface_get_pitch(surface);
2162             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2163             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2164         }
2165         else
2166         {
2167             mem = surface->resource.allocatedMemory;
2168         }
2169
2170         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2171                 surface, surface->texture_level, gl_format, gl_type, mem);
2172
2173         if (surface->flags & SFLAG_PBO)
2174         {
2175             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2176             checkGLcall("glBindBufferARB");
2177
2178             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2179             checkGLcall("glGetTexImage");
2180
2181             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2182             checkGLcall("glBindBufferARB");
2183         }
2184         else
2185         {
2186             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2187             checkGLcall("glGetTexImage");
2188         }
2189         LEAVE_GL();
2190
2191         if (surface->flags & SFLAG_NONPOW2)
2192         {
2193             const BYTE *src_data;
2194             BYTE *dst_data;
2195             UINT y;
2196             /*
2197              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2198              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2199              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2200              *
2201              * We're doing this...
2202              *
2203              * instead of boxing the texture :
2204              * |<-texture width ->|  -->pow2width|   /\
2205              * |111111111111111111|              |   |
2206              * |222 Texture 222222| boxed empty  | texture height
2207              * |3333 Data 33333333|              |   |
2208              * |444444444444444444|              |   \/
2209              * -----------------------------------   |
2210              * |     boxed  empty | boxed empty  | pow2height
2211              * |                  |              |   \/
2212              * -----------------------------------
2213              *
2214              *
2215              * we're repacking the data to the expected texture width
2216              *
2217              * |<-texture width ->|  -->pow2width|   /\
2218              * |111111111111111111222222222222222|   |
2219              * |222333333333333333333444444444444| texture height
2220              * |444444                           |   |
2221              * |                                 |   \/
2222              * |                                 |   |
2223              * |            empty                | pow2height
2224              * |                                 |   \/
2225              * -----------------------------------
2226              *
2227              * == is the same as
2228              *
2229              * |<-texture width ->|    /\
2230              * |111111111111111111|
2231              * |222222222222222222|texture height
2232              * |333333333333333333|
2233              * |444444444444444444|    \/
2234              * --------------------
2235              *
2236              * this also means that any references to allocatedMemory should work with the data as if were a
2237              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2238              *
2239              * internally the texture is still stored in a boxed format so any references to textureName will
2240              * get a boxed texture with width pow2width and not a texture of width resource.width.
2241              *
2242              * Performance should not be an issue, because applications normally do not lock the surfaces when
2243              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2244              * and doesn't have to be re-read. */
2245             src_data = mem;
2246             dst_data = surface->resource.allocatedMemory;
2247             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2248             for (y = 1; y < surface->resource.height; ++y)
2249             {
2250                 /* skip the first row */
2251                 src_data += src_pitch;
2252                 dst_data += dst_pitch;
2253                 memcpy(dst_data, src_data, dst_pitch);
2254             }
2255
2256             HeapFree(GetProcessHeap(), 0, mem);
2257         }
2258     }
2259
2260     /* Surface has now been downloaded */
2261     surface->flags |= SFLAG_INSYSMEM;
2262 }
2263
2264 /* This call just uploads data, the caller is responsible for binding the
2265  * correct texture. */
2266 /* Context activation is done by the caller. */
2267 static void surface_upload_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2268         const struct wined3d_format *format, const RECT *src_rect, UINT src_pitch, const POINT *dst_point,
2269         BOOL srgb, const struct wined3d_bo_address *data)
2270 {
2271     UINT update_w = src_rect->right - src_rect->left;
2272     UINT update_h = src_rect->bottom - src_rect->top;
2273
2274     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_pitch %u, dst_point %s, srgb %#x, data {%#x:%p}.\n",
2275             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_pitch,
2276             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2277
2278     if (surface->flags & SFLAG_LOCKED)
2279     {
2280         WARN("Uploading a surface that is currently mapped, setting SFLAG_PIN_SYSMEM.\n");
2281         surface->flags |= SFLAG_PIN_SYSMEM;
2282     }
2283
2284     if (format->flags & WINED3DFMT_FLAG_HEIGHT_SCALE)
2285     {
2286         update_h *= format->height_scale.numerator;
2287         update_h /= format->height_scale.denominator;
2288     }
2289
2290     ENTER_GL();
2291
2292     if (data->buffer_object)
2293     {
2294         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2295         checkGLcall("glBindBufferARB");
2296     }
2297
2298     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2299     {
2300         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2301         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2302         const BYTE *addr = data->addr;
2303         GLenum internal;
2304
2305         addr += (src_rect->top / format->block_height) * src_pitch;
2306         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2307
2308         if (srgb)
2309             internal = format->glGammaInternal;
2310         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2311             internal = format->rtInternal;
2312         else
2313             internal = format->glInternal;
2314
2315         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2316                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2317                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2318
2319         if (row_length == src_pitch)
2320         {
2321             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2322                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2323         }
2324         else
2325         {
2326             UINT row, y;
2327
2328             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2329              * can't use the unpack row length like below. */
2330             for (row = 0, y = dst_point->y; row < row_count; ++row)
2331             {
2332                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2333                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2334                 y += format->block_height;
2335                 addr += src_pitch;
2336             }
2337         }
2338         checkGLcall("glCompressedTexSubImage2DARB");
2339     }
2340     else
2341     {
2342         const BYTE *addr = data->addr;
2343
2344         addr += src_rect->top * src_pitch;
2345         addr += src_rect->left * format->byte_count;
2346
2347         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2348                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2349                 update_w, update_h, format->glFormat, format->glType, addr);
2350
2351         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_pitch / format->byte_count);
2352         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2353                 update_w, update_h, format->glFormat, format->glType, addr);
2354         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2355         checkGLcall("glTexSubImage2D");
2356     }
2357
2358     if (data->buffer_object)
2359     {
2360         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2361         checkGLcall("glBindBufferARB");
2362     }
2363
2364     LEAVE_GL();
2365
2366     if (wined3d_settings.strict_draw_ordering)
2367         wglFlush();
2368
2369     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2370     {
2371         struct wined3d_device *device = surface->resource.device;
2372         unsigned int i;
2373
2374         for (i = 0; i < device->context_count; ++i)
2375         {
2376             context_surface_update(device->contexts[i], surface);
2377         }
2378     }
2379 }
2380
2381 HRESULT surface_upload_from_surface(struct wined3d_surface *dst_surface, const POINT *dst_point,
2382         struct wined3d_surface *src_surface, const RECT *src_rect)
2383 {
2384     const struct wined3d_format *src_format;
2385     const struct wined3d_format *dst_format;
2386     const struct wined3d_gl_info *gl_info;
2387     enum wined3d_conversion_type convert;
2388     struct wined3d_context *context;
2389     struct wined3d_bo_address data;
2390     struct wined3d_format format;
2391     UINT update_w, update_h;
2392     UINT dst_w, dst_h;
2393     UINT src_w, src_h;
2394     UINT src_pitch;
2395     POINT p;
2396     RECT r;
2397
2398     TRACE("dst_surface %p, dst_point %s, src_surface %p, src_rect %s.\n",
2399             dst_surface, wine_dbgstr_point(dst_point),
2400             src_surface, wine_dbgstr_rect(src_rect));
2401
2402     src_format = src_surface->resource.format;
2403     dst_format = dst_surface->resource.format;
2404
2405     if (src_format->id != dst_format->id)
2406     {
2407         WARN("Source and destination surfaces should have the same format.\n");
2408         return WINED3DERR_INVALIDCALL;
2409     }
2410
2411     if (!dst_point)
2412     {
2413         p.x = 0;
2414         p.y = 0;
2415         dst_point = &p;
2416     }
2417     else if (dst_point->x < 0 || dst_point->y < 0)
2418     {
2419         WARN("Invalid destination point.\n");
2420         return WINED3DERR_INVALIDCALL;
2421     }
2422
2423     if (!src_rect)
2424     {
2425         r.left = 0;
2426         r.top = 0;
2427         r.right = src_surface->resource.width;
2428         r.bottom = src_surface->resource.height;
2429         src_rect = &r;
2430     }
2431     else if (src_rect->left < 0 || src_rect->left >= src_rect->right
2432             || src_rect->top < 0 || src_rect->top >= src_rect->bottom)
2433     {
2434         WARN("Invalid source rectangle.\n");
2435         return WINED3DERR_INVALIDCALL;
2436     }
2437
2438     src_w = src_surface->resource.width;
2439     src_h = src_surface->resource.height;
2440
2441     dst_w = dst_surface->resource.width;
2442     dst_h = dst_surface->resource.height;
2443
2444     update_w = src_rect->right - src_rect->left;
2445     update_h = src_rect->bottom - src_rect->top;
2446
2447     if (update_w > dst_w || dst_point->x > dst_w - update_w
2448             || update_h > dst_h || dst_point->y > dst_h - update_h)
2449     {
2450         WARN("Destination out of bounds.\n");
2451         return WINED3DERR_INVALIDCALL;
2452     }
2453
2454     /* NPOT block sizes would be silly. */
2455     if ((src_format->flags & WINED3DFMT_FLAG_BLOCKS)
2456             && ((update_w & (src_format->block_width - 1) || update_h & (src_format->block_height - 1))
2457             && (src_w != update_w || dst_w != update_w || src_h != update_h || dst_h != update_h)))
2458     {
2459         WARN("Update rect not block-aligned.\n");
2460         return WINED3DERR_INVALIDCALL;
2461     }
2462
2463     /* Use wined3d_surface_blt() instead of uploading directly if we need conversion. */
2464     d3dfmt_get_conv(dst_surface, FALSE, TRUE, &format, &convert);
2465     if (convert != WINED3D_CT_NONE || format.convert)
2466     {
2467         RECT dst_rect = {dst_point->x,  dst_point->y, dst_point->x + update_w, dst_point->y + update_h};
2468         return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, src_rect, 0, NULL, WINED3D_TEXF_POINT);
2469     }
2470
2471     context = context_acquire(dst_surface->resource.device, NULL);
2472     gl_info = context->gl_info;
2473
2474     /* Only load the surface for partial updates. For newly allocated texture
2475      * the texture wouldn't be the current location, and we'd upload zeroes
2476      * just to overwrite them again. */
2477     if (update_w == dst_w && update_h == dst_h)
2478         surface_prepare_texture(dst_surface, context, FALSE);
2479     else
2480         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
2481     surface_bind(dst_surface, context, FALSE);
2482
2483     data.buffer_object = src_surface->pbo;
2484     data.addr = src_surface->resource.allocatedMemory;
2485     src_pitch = wined3d_surface_get_pitch(src_surface);
2486
2487     surface_upload_data(dst_surface, gl_info, src_format, src_rect, src_pitch, dst_point, FALSE, &data);
2488
2489     invalidate_active_texture(dst_surface->resource.device, context);
2490
2491     context_release(context);
2492
2493     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
2494     return WINED3D_OK;
2495 }
2496
2497 /* This call just allocates the texture, the caller is responsible for binding
2498  * the correct texture. */
2499 /* Context activation is done by the caller. */
2500 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2501         const struct wined3d_format *format, BOOL srgb)
2502 {
2503     BOOL enable_client_storage = FALSE;
2504     GLsizei width = surface->pow2Width;
2505     GLsizei height = surface->pow2Height;
2506     const BYTE *mem = NULL;
2507     GLenum internal;
2508
2509     if (srgb)
2510     {
2511         internal = format->glGammaInternal;
2512     }
2513     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2514     {
2515         internal = format->rtInternal;
2516     }
2517     else
2518     {
2519         internal = format->glInternal;
2520     }
2521
2522     if (format->flags & WINED3DFMT_FLAG_HEIGHT_SCALE)
2523     {
2524         height *= format->height_scale.numerator;
2525         height /= format->height_scale.denominator;
2526     }
2527
2528     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2529             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2530             internal, width, height, format->glFormat, format->glType);
2531
2532     ENTER_GL();
2533
2534     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2535     {
2536         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2537                 || !surface->resource.allocatedMemory)
2538         {
2539             /* In some cases we want to disable client storage.
2540              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2541              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2542              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2543              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2544              */
2545             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2546             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2547             surface->flags &= ~SFLAG_CLIENT;
2548             enable_client_storage = TRUE;
2549         }
2550         else
2551         {
2552             surface->flags |= SFLAG_CLIENT;
2553
2554             /* Point OpenGL to our allocated texture memory. Do not use
2555              * resource.allocatedMemory here because it might point into a
2556              * PBO. Instead use heapMemory, but get the alignment right. */
2557             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2558                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2559         }
2560     }
2561
2562     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2563     {
2564         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2565                 internal, width, height, 0, surface->resource.size, mem));
2566         checkGLcall("glCompressedTexImage2DARB");
2567     }
2568     else
2569     {
2570         glTexImage2D(surface->texture_target, surface->texture_level,
2571                 internal, width, height, 0, format->glFormat, format->glType, mem);
2572         checkGLcall("glTexImage2D");
2573     }
2574
2575     if(enable_client_storage) {
2576         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2577         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2578     }
2579     LEAVE_GL();
2580 }
2581
2582 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2583  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2584 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2585 /* GL locking is done by the caller */
2586 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2587 {
2588     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2589     struct wined3d_renderbuffer_entry *entry;
2590     GLuint renderbuffer = 0;
2591     unsigned int src_width, src_height;
2592     unsigned int width, height;
2593
2594     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2595     {
2596         width = rt->pow2Width;
2597         height = rt->pow2Height;
2598     }
2599     else
2600     {
2601         width = surface->pow2Width;
2602         height = surface->pow2Height;
2603     }
2604
2605     src_width = surface->pow2Width;
2606     src_height = surface->pow2Height;
2607
2608     /* A depth stencil smaller than the render target is not valid */
2609     if (width > src_width || height > src_height) return;
2610
2611     /* Remove any renderbuffer set if the sizes match */
2612     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2613             || (width == src_width && height == src_height))
2614     {
2615         surface->current_renderbuffer = NULL;
2616         return;
2617     }
2618
2619     /* Look if we've already got a renderbuffer of the correct dimensions */
2620     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2621     {
2622         if (entry->width == width && entry->height == height)
2623         {
2624             renderbuffer = entry->id;
2625             surface->current_renderbuffer = entry;
2626             break;
2627         }
2628     }
2629
2630     if (!renderbuffer)
2631     {
2632         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2633         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2634         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2635                 surface->resource.format->glInternal, width, height);
2636
2637         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2638         entry->width = width;
2639         entry->height = height;
2640         entry->id = renderbuffer;
2641         list_add_head(&surface->renderbuffers, &entry->entry);
2642
2643         surface->current_renderbuffer = entry;
2644     }
2645
2646     checkGLcall("set_compatible_renderbuffer");
2647 }
2648
2649 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2650 {
2651     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2652
2653     TRACE("surface %p.\n", surface);
2654
2655     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2656     {
2657         ERR("Surface %p is not on a swapchain.\n", surface);
2658         return GL_NONE;
2659     }
2660
2661     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2662     {
2663         if (swapchain->render_to_fbo)
2664         {
2665             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2666             return GL_COLOR_ATTACHMENT0;
2667         }
2668         TRACE("Returning GL_BACK\n");
2669         return GL_BACK;
2670     }
2671     else if (surface == swapchain->front_buffer)
2672     {
2673         TRACE("Returning GL_FRONT\n");
2674         return GL_FRONT;
2675     }
2676
2677     FIXME("Higher back buffer, returning GL_BACK\n");
2678     return GL_BACK;
2679 }
2680
2681 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2682 void surface_add_dirty_rect(struct wined3d_surface *surface, const struct wined3d_box *dirty_rect)
2683 {
2684     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2685
2686     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2687         /* No partial locking for textures yet. */
2688         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2689
2690     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2691     if (dirty_rect)
2692     {
2693         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->left);
2694         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->top);
2695         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->right);
2696         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->bottom);
2697     }
2698     else
2699     {
2700         surface->dirtyRect.left = 0;
2701         surface->dirtyRect.top = 0;
2702         surface->dirtyRect.right = surface->resource.width;
2703         surface->dirtyRect.bottom = surface->resource.height;
2704     }
2705
2706     /* if the container is a texture then mark it dirty. */
2707     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2708     {
2709         TRACE("Passing to container.\n");
2710         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2711     }
2712 }
2713
2714 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2715 {
2716     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2717     BOOL ck_changed;
2718
2719     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2720
2721     if (surface->resource.pool == WINED3D_POOL_SCRATCH)
2722     {
2723         ERR("Not supported on scratch surfaces.\n");
2724         return WINED3DERR_INVALIDCALL;
2725     }
2726
2727     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2728
2729     /* Reload if either the texture and sysmem have different ideas about the
2730      * color key, or the actual key values changed. */
2731     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2732             && (surface->gl_color_key.color_space_low_value != surface->src_blt_color_key.color_space_low_value
2733             || surface->gl_color_key.color_space_high_value != surface->src_blt_color_key.color_space_high_value)))
2734     {
2735         TRACE("Reloading because of color keying\n");
2736         /* To perform the color key conversion we need a sysmem copy of
2737          * the surface. Make sure we have it. */
2738
2739         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2740         /* Make sure the texture is reloaded because of the color key change,
2741          * this kills performance though :( */
2742         /* TODO: This is not necessarily needed with hw palettized texture support. */
2743         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2744         /* Switching color keying on / off may change the internal format. */
2745         if (ck_changed)
2746             surface_force_reload(surface);
2747     }
2748     else if (!(surface->flags & flag))
2749     {
2750         TRACE("Reloading because surface is dirty.\n");
2751     }
2752     else
2753     {
2754         TRACE("surface is already in texture\n");
2755         return WINED3D_OK;
2756     }
2757
2758     /* No partial locking for textures yet. */
2759     surface_load_location(surface, flag, NULL);
2760     surface_evict_sysmem(surface);
2761
2762     return WINED3D_OK;
2763 }
2764
2765 /* See also float_16_to_32() in wined3d_private.h */
2766 static inline unsigned short float_32_to_16(const float *in)
2767 {
2768     int exp = 0;
2769     float tmp = fabsf(*in);
2770     unsigned int mantissa;
2771     unsigned short ret;
2772
2773     /* Deal with special numbers */
2774     if (*in == 0.0f)
2775         return 0x0000;
2776     if (isnan(*in))
2777         return 0x7c01;
2778     if (isinf(*in))
2779         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2780
2781     if (tmp < powf(2, 10))
2782     {
2783         do
2784         {
2785             tmp = tmp * 2.0f;
2786             exp--;
2787         } while (tmp < powf(2, 10));
2788     }
2789     else if (tmp >= powf(2, 11))
2790     {
2791         do
2792         {
2793             tmp /= 2.0f;
2794             exp++;
2795         } while (tmp >= powf(2, 11));
2796     }
2797
2798     mantissa = (unsigned int)tmp;
2799     if (tmp - mantissa >= 0.5f)
2800         ++mantissa; /* Round to nearest, away from zero. */
2801
2802     exp += 10;  /* Normalize the mantissa. */
2803     exp += 15;  /* Exponent is encoded with excess 15. */
2804
2805     if (exp > 30) /* too big */
2806     {
2807         ret = 0x7c00; /* INF */
2808     }
2809     else if (exp <= 0)
2810     {
2811         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2812         while (exp <= 0)
2813         {
2814             mantissa = mantissa >> 1;
2815             ++exp;
2816         }
2817         ret = mantissa & 0x3ff;
2818     }
2819     else
2820     {
2821         ret = (exp << 10) | (mantissa & 0x3ff);
2822     }
2823
2824     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2825     return ret;
2826 }
2827
2828 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2829 {
2830     ULONG refcount;
2831
2832     TRACE("Surface %p, container %p of type %#x.\n",
2833             surface, surface->container.u.base, surface->container.type);
2834
2835     switch (surface->container.type)
2836     {
2837         case WINED3D_CONTAINER_TEXTURE:
2838             return wined3d_texture_incref(surface->container.u.texture);
2839
2840         case WINED3D_CONTAINER_SWAPCHAIN:
2841             return wined3d_swapchain_incref(surface->container.u.swapchain);
2842
2843         default:
2844             ERR("Unhandled container type %#x.\n", surface->container.type);
2845         case WINED3D_CONTAINER_NONE:
2846             break;
2847     }
2848
2849     refcount = InterlockedIncrement(&surface->resource.ref);
2850     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2851
2852     return refcount;
2853 }
2854
2855 /* Do not call while under the GL lock. */
2856 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2857 {
2858     ULONG refcount;
2859
2860     TRACE("Surface %p, container %p of type %#x.\n",
2861             surface, surface->container.u.base, surface->container.type);
2862
2863     switch (surface->container.type)
2864     {
2865         case WINED3D_CONTAINER_TEXTURE:
2866             return wined3d_texture_decref(surface->container.u.texture);
2867
2868         case WINED3D_CONTAINER_SWAPCHAIN:
2869             return wined3d_swapchain_decref(surface->container.u.swapchain);
2870
2871         default:
2872             ERR("Unhandled container type %#x.\n", surface->container.type);
2873         case WINED3D_CONTAINER_NONE:
2874             break;
2875     }
2876
2877     refcount = InterlockedDecrement(&surface->resource.ref);
2878     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2879
2880     if (!refcount)
2881     {
2882         surface_cleanup(surface);
2883         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2884
2885         TRACE("Destroyed surface %p.\n", surface);
2886         HeapFree(GetProcessHeap(), 0, surface);
2887     }
2888
2889     return refcount;
2890 }
2891
2892 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2893 {
2894     return resource_set_priority(&surface->resource, priority);
2895 }
2896
2897 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2898 {
2899     return resource_get_priority(&surface->resource);
2900 }
2901
2902 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2903 {
2904     TRACE("surface %p.\n", surface);
2905
2906     if (!surface->resource.device->d3d_initialized)
2907     {
2908         ERR("D3D not initialized.\n");
2909         return;
2910     }
2911
2912     surface_internal_preload(surface, SRGB_ANY);
2913 }
2914
2915 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2916 {
2917     TRACE("surface %p.\n", surface);
2918
2919     return surface->resource.parent;
2920 }
2921
2922 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2923 {
2924     TRACE("surface %p.\n", surface);
2925
2926     return &surface->resource;
2927 }
2928
2929 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2930 {
2931     TRACE("surface %p, flags %#x.\n", surface, flags);
2932
2933     switch (flags)
2934     {
2935         case WINEDDGBS_CANBLT:
2936         case WINEDDGBS_ISBLTDONE:
2937             return WINED3D_OK;
2938
2939         default:
2940             return WINED3DERR_INVALIDCALL;
2941     }
2942 }
2943
2944 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2945 {
2946     TRACE("surface %p, flags %#x.\n", surface, flags);
2947
2948     /* XXX: DDERR_INVALIDSURFACETYPE */
2949
2950     switch (flags)
2951     {
2952         case WINEDDGFS_CANFLIP:
2953         case WINEDDGFS_ISFLIPDONE:
2954             return WINED3D_OK;
2955
2956         default:
2957             return WINED3DERR_INVALIDCALL;
2958     }
2959 }
2960
2961 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2962 {
2963     TRACE("surface %p.\n", surface);
2964
2965     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2966     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2967 }
2968
2969 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2970 {
2971     TRACE("surface %p.\n", surface);
2972
2973     surface->flags &= ~SFLAG_LOST;
2974     return WINED3D_OK;
2975 }
2976
2977 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2978 {
2979     TRACE("surface %p, palette %p.\n", surface, palette);
2980
2981     if (surface->palette == palette)
2982     {
2983         TRACE("Nop palette change.\n");
2984         return WINED3D_OK;
2985     }
2986
2987     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
2988         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
2989
2990     surface->palette = palette;
2991
2992     if (palette)
2993     {
2994         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
2995             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
2996
2997         surface->surface_ops->surface_realize_palette(surface);
2998     }
2999
3000     return WINED3D_OK;
3001 }
3002
3003 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
3004         DWORD flags, const struct wined3d_color_key *color_key)
3005 {
3006     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
3007
3008     if (flags & WINEDDCKEY_COLORSPACE)
3009     {
3010         FIXME(" colorkey value not supported (%08x) !\n", flags);
3011         return WINED3DERR_INVALIDCALL;
3012     }
3013
3014     /* Dirtify the surface, but only if a key was changed. */
3015     if (color_key)
3016     {
3017         switch (flags & ~WINEDDCKEY_COLORSPACE)
3018         {
3019             case WINEDDCKEY_DESTBLT:
3020                 surface->dst_blt_color_key = *color_key;
3021                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3022                 break;
3023
3024             case WINEDDCKEY_DESTOVERLAY:
3025                 surface->dst_overlay_color_key = *color_key;
3026                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3027                 break;
3028
3029             case WINEDDCKEY_SRCOVERLAY:
3030                 surface->src_overlay_color_key = *color_key;
3031                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3032                 break;
3033
3034             case WINEDDCKEY_SRCBLT:
3035                 surface->src_blt_color_key = *color_key;
3036                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3037                 break;
3038         }
3039     }
3040     else
3041     {
3042         switch (flags & ~WINEDDCKEY_COLORSPACE)
3043         {
3044             case WINEDDCKEY_DESTBLT:
3045                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3046                 break;
3047
3048             case WINEDDCKEY_DESTOVERLAY:
3049                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3050                 break;
3051
3052             case WINEDDCKEY_SRCOVERLAY:
3053                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3054                 break;
3055
3056             case WINEDDCKEY_SRCBLT:
3057                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3058                 break;
3059         }
3060     }
3061
3062     return WINED3D_OK;
3063 }
3064
3065 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3066 {
3067     TRACE("surface %p.\n", surface);
3068
3069     return surface->palette;
3070 }
3071
3072 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3073 {
3074     const struct wined3d_format *format = surface->resource.format;
3075     DWORD pitch;
3076
3077     TRACE("surface %p.\n", surface);
3078
3079     if (format->flags & WINED3DFMT_FLAG_BLOCKS)
3080     {
3081         /* Since compressed formats are block based, pitch means the amount of
3082          * bytes to the next row of block rather than the next row of pixels. */
3083         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3084         pitch = row_block_count * format->block_byte_count;
3085     }
3086     else
3087     {
3088         unsigned char alignment = surface->resource.device->surface_alignment;
3089         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3090         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3091     }
3092
3093     TRACE("Returning %u.\n", pitch);
3094
3095     return pitch;
3096 }
3097
3098 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3099 {
3100     TRACE("surface %p, mem %p.\n", surface, mem);
3101
3102     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
3103     {
3104         WARN("Surface is locked or the DC is in use.\n");
3105         return WINED3DERR_INVALIDCALL;
3106     }
3107
3108     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3109     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3110     {
3111         ERR("Not supported on render targets.\n");
3112         return WINED3DERR_INVALIDCALL;
3113     }
3114
3115     if (mem && mem != surface->resource.allocatedMemory)
3116     {
3117         void *release = NULL;
3118
3119         /* Do I have to copy the old surface content? */
3120         if (surface->flags & SFLAG_DIBSECTION)
3121         {
3122             DeleteDC(surface->hDC);
3123             DeleteObject(surface->dib.DIBsection);
3124             surface->dib.bitmap_data = NULL;
3125             surface->resource.allocatedMemory = NULL;
3126             surface->hDC = NULL;
3127             surface->flags &= ~SFLAG_DIBSECTION;
3128         }
3129         else if (!(surface->flags & SFLAG_USERPTR))
3130         {
3131             release = surface->resource.heapMemory;
3132             surface->resource.heapMemory = NULL;
3133         }
3134         surface->resource.allocatedMemory = mem;
3135         surface->flags |= SFLAG_USERPTR;
3136
3137         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3138         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3139
3140         /* For client textures OpenGL has to be notified. */
3141         if (surface->flags & SFLAG_CLIENT)
3142             surface_release_client_storage(surface);
3143
3144         /* Now free the old memory if any. */
3145         HeapFree(GetProcessHeap(), 0, release);
3146     }
3147     else if (surface->flags & SFLAG_USERPTR)
3148     {
3149         /* HeapMemory should be NULL already. */
3150         if (surface->resource.heapMemory)
3151             ERR("User pointer surface has heap memory allocated.\n");
3152
3153         if (!mem)
3154         {
3155             surface->resource.allocatedMemory = NULL;
3156             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3157
3158             if (surface->flags & SFLAG_CLIENT)
3159                 surface_release_client_storage(surface);
3160
3161             surface_prepare_system_memory(surface);
3162         }
3163
3164         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3165     }
3166
3167     return WINED3D_OK;
3168 }
3169
3170 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3171 {
3172     LONG w, h;
3173
3174     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3175
3176     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3177     {
3178         WARN("Not an overlay surface.\n");
3179         return WINEDDERR_NOTAOVERLAYSURFACE;
3180     }
3181
3182     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3183     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3184     surface->overlay_destrect.left = x;
3185     surface->overlay_destrect.top = y;
3186     surface->overlay_destrect.right = x + w;
3187     surface->overlay_destrect.bottom = y + h;
3188
3189     surface_draw_overlay(surface);
3190
3191     return WINED3D_OK;
3192 }
3193
3194 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3195 {
3196     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3197
3198     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3199     {
3200         TRACE("Not an overlay surface.\n");
3201         return WINEDDERR_NOTAOVERLAYSURFACE;
3202     }
3203
3204     if (!surface->overlay_dest)
3205     {
3206         TRACE("Overlay not visible.\n");
3207         *x = 0;
3208         *y = 0;
3209         return WINEDDERR_OVERLAYNOTVISIBLE;
3210     }
3211
3212     *x = surface->overlay_destrect.left;
3213     *y = surface->overlay_destrect.top;
3214
3215     TRACE("Returning position %d, %d.\n", *x, *y);
3216
3217     return WINED3D_OK;
3218 }
3219
3220 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3221         DWORD flags, struct wined3d_surface *ref)
3222 {
3223     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3224
3225     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3226     {
3227         TRACE("Not an overlay surface.\n");
3228         return WINEDDERR_NOTAOVERLAYSURFACE;
3229     }
3230
3231     return WINED3D_OK;
3232 }
3233
3234 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3235         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3236 {
3237     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3238             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3239
3240     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3241     {
3242         WARN("Not an overlay surface.\n");
3243         return WINEDDERR_NOTAOVERLAYSURFACE;
3244     }
3245     else if (!dst_surface)
3246     {
3247         WARN("Dest surface is NULL.\n");
3248         return WINED3DERR_INVALIDCALL;
3249     }
3250
3251     if (src_rect)
3252     {
3253         surface->overlay_srcrect = *src_rect;
3254     }
3255     else
3256     {
3257         surface->overlay_srcrect.left = 0;
3258         surface->overlay_srcrect.top = 0;
3259         surface->overlay_srcrect.right = surface->resource.width;
3260         surface->overlay_srcrect.bottom = surface->resource.height;
3261     }
3262
3263     if (dst_rect)
3264     {
3265         surface->overlay_destrect = *dst_rect;
3266     }
3267     else
3268     {
3269         surface->overlay_destrect.left = 0;
3270         surface->overlay_destrect.top = 0;
3271         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3272         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3273     }
3274
3275     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3276     {
3277         surface->overlay_dest = NULL;
3278         list_remove(&surface->overlay_entry);
3279     }
3280
3281     if (flags & WINEDDOVER_SHOW)
3282     {
3283         if (surface->overlay_dest != dst_surface)
3284         {
3285             surface->overlay_dest = dst_surface;
3286             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3287         }
3288     }
3289     else if (flags & WINEDDOVER_HIDE)
3290     {
3291         /* tests show that the rectangles are erased on hide */
3292         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3293         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3294         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3295         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3296         surface->overlay_dest = NULL;
3297     }
3298
3299     surface_draw_overlay(surface);
3300
3301     return WINED3D_OK;
3302 }
3303
3304 HRESULT CDECL wined3d_surface_update_desc(struct wined3d_surface *surface,
3305         UINT width, UINT height, enum wined3d_format_id format_id,
3306         enum wined3d_multisample_type multisample_type, UINT multisample_quality)
3307 {
3308     struct wined3d_device *device = surface->resource.device;
3309     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
3310     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
3311     UINT resource_size = wined3d_format_calculate_size(format, device->surface_alignment, width, height);
3312
3313     TRACE("surface %p, width %u, height %u, format %s, multisample_type %#x, multisample_quality %u.\n",
3314             surface, width, height, debug_d3dformat(format_id), multisample_type, multisample_type);
3315
3316     if (!resource_size)
3317         return WINED3DERR_INVALIDCALL;
3318
3319     if (device->d3d_initialized)
3320         surface->resource.resource_ops->resource_unload(&surface->resource);
3321
3322     if (surface->flags & SFLAG_DIBSECTION)
3323     {
3324         DeleteDC(surface->hDC);
3325         DeleteObject(surface->dib.DIBsection);
3326         surface->dib.bitmap_data = NULL;
3327         surface->flags &= ~SFLAG_DIBSECTION;
3328     }
3329
3330     surface->flags &= ~(SFLAG_LOCATIONS | SFLAG_USERPTR);
3331     surface->resource.allocatedMemory = NULL;
3332     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
3333     surface->resource.heapMemory = NULL;
3334
3335     surface->resource.width = width;
3336     surface->resource.height = height;
3337     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[ARB_TEXTURE_RECTANGLE]
3338             || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
3339     {
3340         surface->pow2Width = width;
3341         surface->pow2Height = height;
3342     }
3343     else
3344     {
3345         surface->pow2Width = surface->pow2Height = 1;
3346         while (surface->pow2Width < width)
3347             surface->pow2Width <<= 1;
3348         while (surface->pow2Height < height)
3349             surface->pow2Height <<= 1;
3350     }
3351
3352     if (surface->pow2Width != width || surface->pow2Height != height)
3353         surface->flags |= SFLAG_NONPOW2;
3354     else
3355         surface->flags &= ~SFLAG_NONPOW2;
3356
3357     surface->resource.format = format;
3358     surface->resource.multisample_type = multisample_type;
3359     surface->resource.multisample_quality = multisample_quality;
3360     surface->resource.size = resource_size;
3361
3362     if (!surface_init_sysmem(surface))
3363         return E_OUTOFMEMORY;
3364
3365     return WINED3D_OK;
3366 }
3367
3368 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3369         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3370 {
3371     unsigned short *dst_s;
3372     const float *src_f;
3373     unsigned int x, y;
3374
3375     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3376
3377     for (y = 0; y < h; ++y)
3378     {
3379         src_f = (const float *)(src + y * pitch_in);
3380         dst_s = (unsigned short *) (dst + y * pitch_out);
3381         for (x = 0; x < w; ++x)
3382         {
3383             dst_s[x] = float_32_to_16(src_f + x);
3384         }
3385     }
3386 }
3387
3388 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3389         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3390 {
3391     static const unsigned char convert_5to8[] =
3392     {
3393         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3394         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3395         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3396         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3397     };
3398     static const unsigned char convert_6to8[] =
3399     {
3400         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3401         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3402         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3403         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3404         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3405         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3406         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3407         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3408     };
3409     unsigned int x, y;
3410
3411     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3412
3413     for (y = 0; y < h; ++y)
3414     {
3415         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3416         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3417         for (x = 0; x < w; ++x)
3418         {
3419             WORD pixel = src_line[x];
3420             dst_line[x] = 0xff000000
3421                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3422                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3423                     | convert_5to8[(pixel & 0x001f)];
3424         }
3425     }
3426 }
3427
3428 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3429  * in both cases we're just setting the X / Alpha channel to 0xff. */
3430 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3431         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3432 {
3433     unsigned int x, y;
3434
3435     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3436
3437     for (y = 0; y < h; ++y)
3438     {
3439         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3440         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3441
3442         for (x = 0; x < w; ++x)
3443         {
3444             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3445         }
3446     }
3447 }
3448
3449 static inline BYTE cliptobyte(int x)
3450 {
3451     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3452 }
3453
3454 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3455         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3456 {
3457     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3458     unsigned int x, y;
3459
3460     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3461
3462     for (y = 0; y < h; ++y)
3463     {
3464         const BYTE *src_line = src + y * pitch_in;
3465         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3466         for (x = 0; x < w; ++x)
3467         {
3468             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3469              *     C = Y - 16; D = U - 128; E = V - 128;
3470              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3471              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3472              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3473              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3474              * U and V are shared between the pixels. */
3475             if (!(x & 1)) /* For every even pixel, read new U and V. */
3476             {
3477                 d = (int) src_line[1] - 128;
3478                 e = (int) src_line[3] - 128;
3479                 r2 = 409 * e + 128;
3480                 g2 = - 100 * d - 208 * e + 128;
3481                 b2 = 516 * d + 128;
3482             }
3483             c2 = 298 * ((int) src_line[0] - 16);
3484             dst_line[x] = 0xff000000
3485                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3486                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3487                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3488                 /* Scale RGB values to 0..255 range,
3489                  * then clip them if still not in range (may be negative),
3490                  * then shift them within DWORD if necessary. */
3491             src_line += 2;
3492         }
3493     }
3494 }
3495
3496 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3497         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3498 {
3499     unsigned int x, y;
3500     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3501
3502     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3503
3504     for (y = 0; y < h; ++y)
3505     {
3506         const BYTE *src_line = src + y * pitch_in;
3507         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3508         for (x = 0; x < w; ++x)
3509         {
3510             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3511              *     C = Y - 16; D = U - 128; E = V - 128;
3512              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3513              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3514              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3515              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3516              * U and V are shared between the pixels. */
3517             if (!(x & 1)) /* For every even pixel, read new U and V. */
3518             {
3519                 d = (int) src_line[1] - 128;
3520                 e = (int) src_line[3] - 128;
3521                 r2 = 409 * e + 128;
3522                 g2 = - 100 * d - 208 * e + 128;
3523                 b2 = 516 * d + 128;
3524             }
3525             c2 = 298 * ((int) src_line[0] - 16);
3526             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3527                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3528                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3529                 /* Scale RGB values to 0..255 range,
3530                  * then clip them if still not in range (may be negative),
3531                  * then shift them within DWORD if necessary. */
3532             src_line += 2;
3533         }
3534     }
3535 }
3536
3537 struct d3dfmt_convertor_desc
3538 {
3539     enum wined3d_format_id from, to;
3540     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3541 };
3542
3543 static const struct d3dfmt_convertor_desc convertors[] =
3544 {
3545     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3546     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3547     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3548     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3549     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3550     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3551 };
3552
3553 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3554         enum wined3d_format_id to)
3555 {
3556     unsigned int i;
3557
3558     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3559     {
3560         if (convertors[i].from == from && convertors[i].to == to)
3561             return &convertors[i];
3562     }
3563
3564     return NULL;
3565 }
3566
3567 /*****************************************************************************
3568  * surface_convert_format
3569  *
3570  * Creates a duplicate of a surface in a different format. Is used by Blt to
3571  * blit between surfaces with different formats.
3572  *
3573  * Parameters
3574  *  source: Source surface
3575  *  fmt: Requested destination format
3576  *
3577  *****************************************************************************/
3578 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3579 {
3580     struct wined3d_mapped_rect src_map, dst_map;
3581     const struct d3dfmt_convertor_desc *conv;
3582     struct wined3d_surface *ret = NULL;
3583     HRESULT hr;
3584
3585     conv = find_convertor(source->resource.format->id, to_fmt);
3586     if (!conv)
3587     {
3588         FIXME("Cannot find a conversion function from format %s to %s.\n",
3589                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3590         return NULL;
3591     }
3592
3593     wined3d_surface_create(source->resource.device, source->resource.width,
3594             source->resource.height, to_fmt, 0 /* level */, 0 /* usage */, WINED3D_POOL_SCRATCH,
3595             WINED3D_MULTISAMPLE_NONE /* TODO: Multisampled conversion */, 0 /* MultiSampleQuality */,
3596             source->surface_type, WINED3D_SURFACE_MAPPABLE | WINED3D_SURFACE_DISCARD,
3597             NULL /* parent */, &wined3d_null_parent_ops, &ret);
3598     if (!ret)
3599     {
3600         ERR("Failed to create a destination surface for conversion.\n");
3601         return NULL;
3602     }
3603
3604     memset(&src_map, 0, sizeof(src_map));
3605     memset(&dst_map, 0, sizeof(dst_map));
3606
3607     hr = wined3d_surface_map(source, &src_map, NULL, WINED3DLOCK_READONLY);
3608     if (FAILED(hr))
3609     {
3610         ERR("Failed to lock the source surface.\n");
3611         wined3d_surface_decref(ret);
3612         return NULL;
3613     }
3614     hr = wined3d_surface_map(ret, &dst_map, NULL, WINED3DLOCK_READONLY);
3615     if (FAILED(hr))
3616     {
3617         ERR("Failed to lock the destination surface.\n");
3618         wined3d_surface_unmap(source);
3619         wined3d_surface_decref(ret);
3620         return NULL;
3621     }
3622
3623     conv->convert(src_map.data, dst_map.data, src_map.row_pitch, dst_map.row_pitch,
3624             source->resource.width, source->resource.height);
3625
3626     wined3d_surface_unmap(ret);
3627     wined3d_surface_unmap(source);
3628
3629     return ret;
3630 }
3631
3632 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3633         unsigned int bpp, UINT pitch, DWORD color)
3634 {
3635     BYTE *first;
3636     int x, y;
3637
3638     /* Do first row */
3639
3640 #define COLORFILL_ROW(type) \
3641 do { \
3642     type *d = (type *)buf; \
3643     for (x = 0; x < width; ++x) \
3644         d[x] = (type)color; \
3645 } while(0)
3646
3647     switch (bpp)
3648     {
3649         case 1:
3650             COLORFILL_ROW(BYTE);
3651             break;
3652
3653         case 2:
3654             COLORFILL_ROW(WORD);
3655             break;
3656
3657         case 3:
3658         {
3659             BYTE *d = buf;
3660             for (x = 0; x < width; ++x, d += 3)
3661             {
3662                 d[0] = (color      ) & 0xFF;
3663                 d[1] = (color >>  8) & 0xFF;
3664                 d[2] = (color >> 16) & 0xFF;
3665             }
3666             break;
3667         }
3668         case 4:
3669             COLORFILL_ROW(DWORD);
3670             break;
3671
3672         default:
3673             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3674             return WINED3DERR_NOTAVAILABLE;
3675     }
3676
3677 #undef COLORFILL_ROW
3678
3679     /* Now copy first row. */
3680     first = buf;
3681     for (y = 1; y < height; ++y)
3682     {
3683         buf += pitch;
3684         memcpy(buf, first, width * bpp);
3685     }
3686
3687     return WINED3D_OK;
3688 }
3689
3690 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3691 {
3692     TRACE("surface %p.\n", surface);
3693
3694     if (!(surface->flags & SFLAG_LOCKED))
3695     {
3696         WARN("Trying to unmap unmapped surface.\n");
3697         return WINEDDERR_NOTLOCKED;
3698     }
3699     surface->flags &= ~SFLAG_LOCKED;
3700
3701     surface->surface_ops->surface_unmap(surface);
3702
3703     return WINED3D_OK;
3704 }
3705
3706 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3707         struct wined3d_mapped_rect *mapped_rect, const RECT *rect, DWORD flags)
3708 {
3709     const struct wined3d_format *format = surface->resource.format;
3710
3711     TRACE("surface %p, mapped_rect %p, rect %s, flags %#x.\n",
3712             surface, mapped_rect, wine_dbgstr_rect(rect), flags);
3713
3714     if (surface->flags & SFLAG_LOCKED)
3715     {
3716         WARN("Surface is already mapped.\n");
3717         return WINED3DERR_INVALIDCALL;
3718     }
3719     if ((format->flags & WINED3DFMT_FLAG_BLOCKS)
3720             && rect && (rect->left || rect->top
3721             || rect->right != surface->resource.width
3722             || rect->bottom != surface->resource.height))
3723     {
3724         UINT width_mask = format->block_width - 1;
3725         UINT height_mask = format->block_height - 1;
3726
3727         if ((rect->left & width_mask) || (rect->right & width_mask)
3728                 || (rect->top & height_mask) || (rect->bottom & height_mask))
3729         {
3730             WARN("Map rect %s is misaligned for %ux%u blocks.\n",
3731                     wine_dbgstr_rect(rect), format->block_width, format->block_height);
3732
3733             if (surface->resource.pool == WINED3D_POOL_DEFAULT)
3734                 return WINED3DERR_INVALIDCALL;
3735         }
3736     }
3737
3738     surface->flags |= SFLAG_LOCKED;
3739
3740     if (!(surface->flags & SFLAG_LOCKABLE))
3741         WARN("Trying to lock unlockable surface.\n");
3742
3743     /* Performance optimization: Count how often a surface is mapped, if it is
3744      * mapped regularly do not throw away the system memory copy. This avoids
3745      * the need to download the surface from OpenGL all the time. The surface
3746      * is still downloaded if the OpenGL texture is changed. */
3747     if (!(surface->flags & SFLAG_DYNLOCK))
3748     {
3749         if (++surface->lockCount > MAXLOCKCOUNT)
3750         {
3751             TRACE("Surface is mapped regularly, not freeing the system memory copy any more.\n");
3752             surface->flags |= SFLAG_DYNLOCK;
3753         }
3754     }
3755
3756     surface->surface_ops->surface_map(surface, rect, flags);
3757
3758     if (format->flags & WINED3DFMT_FLAG_BROKEN_PITCH)
3759         mapped_rect->row_pitch = surface->resource.width * format->byte_count;
3760     else
3761         mapped_rect->row_pitch = wined3d_surface_get_pitch(surface);
3762
3763     if (!rect)
3764     {
3765         mapped_rect->data = surface->resource.allocatedMemory;
3766         surface->lockedRect.left = 0;
3767         surface->lockedRect.top = 0;
3768         surface->lockedRect.right = surface->resource.width;
3769         surface->lockedRect.bottom = surface->resource.height;
3770     }
3771     else
3772     {
3773         if ((format->flags & (WINED3DFMT_FLAG_BLOCKS | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_BLOCKS)
3774         {
3775             /* Compressed textures are block based, so calculate the offset of
3776              * the block that contains the top-left pixel of the locked rectangle. */
3777             mapped_rect->data = surface->resource.allocatedMemory
3778                     + ((rect->top / format->block_height) * mapped_rect->row_pitch)
3779                     + ((rect->left / format->block_width) * format->block_byte_count);
3780         }
3781         else
3782         {
3783             mapped_rect->data = surface->resource.allocatedMemory
3784                     + (mapped_rect->row_pitch * rect->top)
3785                     + (rect->left * format->byte_count);
3786         }
3787         surface->lockedRect.left = rect->left;
3788         surface->lockedRect.top = rect->top;
3789         surface->lockedRect.right = rect->right;
3790         surface->lockedRect.bottom = rect->bottom;
3791     }
3792
3793     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3794     TRACE("Returning memory %p, pitch %u.\n", mapped_rect->data, mapped_rect->row_pitch);
3795
3796     return WINED3D_OK;
3797 }
3798
3799 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3800 {
3801     struct wined3d_mapped_rect map;
3802     HRESULT hr;
3803
3804     TRACE("surface %p, dc %p.\n", surface, dc);
3805
3806     if (surface->flags & SFLAG_USERPTR)
3807     {
3808         ERR("Not supported on surfaces with application-provided memory.\n");
3809         return WINEDDERR_NODC;
3810     }
3811
3812     /* Give more detailed info for ddraw. */
3813     if (surface->flags & SFLAG_DCINUSE)
3814         return WINEDDERR_DCALREADYCREATED;
3815
3816     /* Can't GetDC if the surface is locked. */
3817     if (surface->flags & SFLAG_LOCKED)
3818         return WINED3DERR_INVALIDCALL;
3819
3820     /* Create a DIB section if there isn't a dc yet. */
3821     if (!surface->hDC)
3822     {
3823         if (surface->flags & SFLAG_CLIENT)
3824         {
3825             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3826             surface_release_client_storage(surface);
3827         }
3828         hr = surface_create_dib_section(surface);
3829         if (FAILED(hr))
3830             return WINED3DERR_INVALIDCALL;
3831
3832         /* Use the DIB section from now on if we are not using a PBO. */
3833         if (!(surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM)))
3834         {
3835             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
3836             surface->resource.heapMemory = NULL;
3837             surface->resource.allocatedMemory = surface->dib.bitmap_data;
3838         }
3839     }
3840
3841     /* Map the surface. */
3842     hr = wined3d_surface_map(surface, &map, NULL, 0);
3843     if (FAILED(hr))
3844     {
3845         ERR("Map failed, hr %#x.\n", hr);
3846         return hr;
3847     }
3848
3849     /* Sync the DIB with the PBO. This can't be done earlier because Map()
3850      * activates the allocatedMemory. */
3851     if (surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM))
3852         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
3853
3854     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3855             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3856     {
3857         /* GetDC on palettized formats is unsupported in D3D9, and the method
3858          * is missing in D3D8, so this should only be used for DX <=7
3859          * surfaces (with non-device palettes). */
3860         const PALETTEENTRY *pal = NULL;
3861
3862         if (surface->palette)
3863         {
3864             pal = surface->palette->palents;
3865         }
3866         else
3867         {
3868             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3869             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3870
3871             if (dds_primary && dds_primary->palette)
3872                 pal = dds_primary->palette->palents;
3873         }
3874
3875         if (pal)
3876         {
3877             RGBQUAD col[256];
3878             unsigned int i;
3879
3880             for (i = 0; i < 256; ++i)
3881             {
3882                 col[i].rgbRed = pal[i].peRed;
3883                 col[i].rgbGreen = pal[i].peGreen;
3884                 col[i].rgbBlue = pal[i].peBlue;
3885                 col[i].rgbReserved = 0;
3886             }
3887             SetDIBColorTable(surface->hDC, 0, 256, col);
3888         }
3889     }
3890
3891     surface->flags |= SFLAG_DCINUSE;
3892
3893     *dc = surface->hDC;
3894     TRACE("Returning dc %p.\n", *dc);
3895
3896     return WINED3D_OK;
3897 }
3898
3899 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3900 {
3901     TRACE("surface %p, dc %p.\n", surface, dc);
3902
3903     if (!(surface->flags & SFLAG_DCINUSE))
3904         return WINEDDERR_NODC;
3905
3906     if (surface->hDC != dc)
3907     {
3908         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3909                 dc, surface->hDC);
3910         return WINEDDERR_NODC;
3911     }
3912
3913     /* Copy the contents of the DIB over to the PBO. */
3914     if ((surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM)) && surface->resource.allocatedMemory)
3915         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
3916
3917     /* We locked first, so unlock now. */
3918     wined3d_surface_unmap(surface);
3919
3920     surface->flags &= ~SFLAG_DCINUSE;
3921
3922     return WINED3D_OK;
3923 }
3924
3925 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3926 {
3927     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3928
3929     if (flags)
3930     {
3931         static UINT once;
3932         if (!once++)
3933             FIXME("Ignoring flags %#x.\n", flags);
3934         else
3935             WARN("Ignoring flags %#x.\n", flags);
3936     }
3937
3938     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
3939     {
3940         ERR("Not supported on swapchain surfaces.\n");
3941         return WINEDDERR_NOTFLIPPABLE;
3942     }
3943
3944     /* Flipping is only supported on render targets and overlays. */
3945     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
3946     {
3947         WARN("Tried to flip a non-render target, non-overlay surface.\n");
3948         return WINEDDERR_NOTFLIPPABLE;
3949     }
3950
3951     flip_surface(surface, override);
3952
3953     /* Update overlays if they're visible. */
3954     if ((surface->resource.usage & WINED3DUSAGE_OVERLAY) && surface->overlay_dest)
3955         return surface_draw_overlay(surface);
3956
3957     return WINED3D_OK;
3958 }
3959
3960 /* Do not call while under the GL lock. */
3961 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3962 {
3963     struct wined3d_device *device = surface->resource.device;
3964
3965     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3966
3967     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3968     {
3969         struct wined3d_texture *texture = surface->container.u.texture;
3970
3971         TRACE("Passing to container (%p).\n", texture);
3972         texture->texture_ops->texture_preload(texture, srgb);
3973     }
3974     else
3975     {
3976         struct wined3d_context *context;
3977
3978         TRACE("(%p) : About to load surface\n", surface);
3979
3980         /* TODO: Use already acquired context when possible. */
3981         context = context_acquire(device, NULL);
3982
3983         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3984
3985         if (surface->resource.pool == WINED3D_POOL_DEFAULT)
3986         {
3987             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3988             GLclampf tmp;
3989             tmp = 0.9f;
3990             ENTER_GL();
3991             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3992             LEAVE_GL();
3993         }
3994
3995         context_release(context);
3996     }
3997 }
3998
3999 BOOL surface_init_sysmem(struct wined3d_surface *surface)
4000 {
4001     if (!surface->resource.allocatedMemory)
4002     {
4003         if (!surface->resource.heapMemory)
4004         {
4005             if (!(surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
4006                     surface->resource.size + RESOURCE_ALIGNMENT)))
4007             {
4008                 ERR("Failed to allocate memory.\n");
4009                 return FALSE;
4010             }
4011         }
4012         else if (!(surface->flags & SFLAG_CLIENT))
4013         {
4014             ERR("Surface %p has heapMemory %p and flags %#x.\n",
4015                     surface, surface->resource.heapMemory, surface->flags);
4016         }
4017
4018         surface->resource.allocatedMemory =
4019             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
4020     }
4021     else
4022     {
4023         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
4024     }
4025
4026     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
4027
4028     return TRUE;
4029 }
4030
4031 /* Read the framebuffer back into the surface */
4032 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
4033 {
4034     struct wined3d_device *device = surface->resource.device;
4035     const struct wined3d_gl_info *gl_info;
4036     struct wined3d_context *context;
4037     BYTE *mem;
4038     GLint fmt;
4039     GLint type;
4040     BYTE *row, *top, *bottom;
4041     int i;
4042     BOOL bpp;
4043     RECT local_rect;
4044     BOOL srcIsUpsideDown;
4045     GLint rowLen = 0;
4046     GLint skipPix = 0;
4047     GLint skipRow = 0;
4048
4049     context = context_acquire(device, surface);
4050     context_apply_blit_state(context, device);
4051     gl_info = context->gl_info;
4052
4053     ENTER_GL();
4054
4055     /* Select the correct read buffer, and give some debug output.
4056      * There is no need to keep track of the current read buffer or reset it, every part of the code
4057      * that reads sets the read buffer as desired.
4058      */
4059     if (surface_is_offscreen(surface))
4060     {
4061         /* Mapping the primary render target which is not on a swapchain.
4062          * Read from the back buffer. */
4063         TRACE("Mapping offscreen render target.\n");
4064         glReadBuffer(device->offscreenBuffer);
4065         srcIsUpsideDown = TRUE;
4066     }
4067     else
4068     {
4069         /* Onscreen surfaces are always part of a swapchain */
4070         GLenum buffer = surface_get_gl_buffer(surface);
4071         TRACE("Mapping %#x buffer.\n", buffer);
4072         glReadBuffer(buffer);
4073         checkGLcall("glReadBuffer");
4074         srcIsUpsideDown = FALSE;
4075     }
4076
4077     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
4078     if (!rect)
4079     {
4080         local_rect.left = 0;
4081         local_rect.top = 0;
4082         local_rect.right = surface->resource.width;
4083         local_rect.bottom = surface->resource.height;
4084     }
4085     else
4086     {
4087         local_rect = *rect;
4088     }
4089     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
4090
4091     switch (surface->resource.format->id)
4092     {
4093         case WINED3DFMT_P8_UINT:
4094         {
4095             if (primary_render_target_is_p8(device))
4096             {
4097                 /* In case of P8 render targets the index is stored in the alpha component */
4098                 fmt = GL_ALPHA;
4099                 type = GL_UNSIGNED_BYTE;
4100                 mem = dest;
4101                 bpp = surface->resource.format->byte_count;
4102             }
4103             else
4104             {
4105                 /* GL can't return palettized data, so read ARGB pixels into a
4106                  * separate block of memory and convert them into palettized format
4107                  * in software. Slow, but if the app means to use palettized render
4108                  * targets and locks it...
4109                  *
4110                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4111                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4112                  * for the color channels when palettizing the colors.
4113                  */
4114                 fmt = GL_RGB;
4115                 type = GL_UNSIGNED_BYTE;
4116                 pitch *= 3;
4117                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4118                 if (!mem)
4119                 {
4120                     ERR("Out of memory\n");
4121                     LEAVE_GL();
4122                     return;
4123                 }
4124                 bpp = surface->resource.format->byte_count * 3;
4125             }
4126         }
4127         break;
4128
4129         default:
4130             mem = dest;
4131             fmt = surface->resource.format->glFormat;
4132             type = surface->resource.format->glType;
4133             bpp = surface->resource.format->byte_count;
4134     }
4135
4136     if (surface->flags & SFLAG_PBO)
4137     {
4138         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4139         checkGLcall("glBindBufferARB");
4140         if (mem)
4141         {
4142             ERR("mem not null for pbo -- unexpected\n");
4143             mem = NULL;
4144         }
4145     }
4146
4147     /* Save old pixel store pack state */
4148     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4149     checkGLcall("glGetIntegerv");
4150     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4151     checkGLcall("glGetIntegerv");
4152     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4153     checkGLcall("glGetIntegerv");
4154
4155     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4156     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4157     checkGLcall("glPixelStorei");
4158     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4159     checkGLcall("glPixelStorei");
4160     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4161     checkGLcall("glPixelStorei");
4162
4163     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4164             local_rect.right - local_rect.left,
4165             local_rect.bottom - local_rect.top,
4166             fmt, type, mem);
4167     checkGLcall("glReadPixels");
4168
4169     /* Reset previous pixel store pack state */
4170     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4171     checkGLcall("glPixelStorei");
4172     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4173     checkGLcall("glPixelStorei");
4174     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4175     checkGLcall("glPixelStorei");
4176
4177     if (surface->flags & SFLAG_PBO)
4178     {
4179         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4180         checkGLcall("glBindBufferARB");
4181
4182         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4183          * to get a pointer to it and perform the flipping in software. This is a lot
4184          * faster than calling glReadPixels for each line. In case we want more speed
4185          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4186         if (!srcIsUpsideDown)
4187         {
4188             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4189             checkGLcall("glBindBufferARB");
4190
4191             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4192             checkGLcall("glMapBufferARB");
4193         }
4194     }
4195
4196     /* TODO: Merge this with the palettization loop below for P8 targets */
4197     if(!srcIsUpsideDown) {
4198         UINT len, off;
4199         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4200             Flip the lines in software */
4201         len = (local_rect.right - local_rect.left) * bpp;
4202         off = local_rect.left * bpp;
4203
4204         row = HeapAlloc(GetProcessHeap(), 0, len);
4205         if(!row) {
4206             ERR("Out of memory\n");
4207             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4208                 HeapFree(GetProcessHeap(), 0, mem);
4209             LEAVE_GL();
4210             return;
4211         }
4212
4213         top = mem + pitch * local_rect.top;
4214         bottom = mem + pitch * (local_rect.bottom - 1);
4215         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4216             memcpy(row, top + off, len);
4217             memcpy(top + off, bottom + off, len);
4218             memcpy(bottom + off, row, len);
4219             top += pitch;
4220             bottom -= pitch;
4221         }
4222         HeapFree(GetProcessHeap(), 0, row);
4223
4224         /* Unmap the temp PBO buffer */
4225         if (surface->flags & SFLAG_PBO)
4226         {
4227             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4228             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4229         }
4230     }
4231
4232     LEAVE_GL();
4233     context_release(context);
4234
4235     /* For P8 textures we need to perform an inverse palette lookup. This is
4236      * done by searching for a palette index which matches the RGB value.
4237      * Note this isn't guaranteed to work when there are multiple entries for
4238      * the same color but we have no choice. In case of P8 render targets,
4239      * the index is stored in the alpha component so no conversion is needed. */
4240     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4241     {
4242         const PALETTEENTRY *pal = NULL;
4243         DWORD width = pitch / 3;
4244         int x, y, c;
4245
4246         if (surface->palette)
4247         {
4248             pal = surface->palette->palents;
4249         }
4250         else
4251         {
4252             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4253             HeapFree(GetProcessHeap(), 0, mem);
4254             return;
4255         }
4256
4257         for(y = local_rect.top; y < local_rect.bottom; y++) {
4258             for(x = local_rect.left; x < local_rect.right; x++) {
4259                 /*                      start              lines            pixels      */
4260                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4261                 const BYTE *green = blue  + 1;
4262                 const BYTE *red = green + 1;
4263
4264                 for(c = 0; c < 256; c++) {
4265                     if(*red   == pal[c].peRed   &&
4266                        *green == pal[c].peGreen &&
4267                        *blue  == pal[c].peBlue)
4268                     {
4269                         *((BYTE *) dest + y * width + x) = c;
4270                         break;
4271                     }
4272                 }
4273             }
4274         }
4275         HeapFree(GetProcessHeap(), 0, mem);
4276     }
4277 }
4278
4279 /* Read the framebuffer contents into a texture. Note that this function
4280  * doesn't do any kind of flipping. Using this on an onscreen surface will
4281  * result in a flipped D3D texture. */
4282 void surface_load_fb_texture(struct wined3d_surface *surface, BOOL srgb)
4283 {
4284     struct wined3d_device *device = surface->resource.device;
4285     struct wined3d_context *context;
4286
4287     context = context_acquire(device, surface);
4288     device_invalidate_state(device, STATE_FRAMEBUFFER);
4289
4290     surface_prepare_texture(surface, context, srgb);
4291     surface_bind_and_dirtify(surface, context, srgb);
4292
4293     TRACE("Reading back offscreen render target %p.\n", surface);
4294
4295     ENTER_GL();
4296
4297     if (surface_is_offscreen(surface))
4298         glReadBuffer(device->offscreenBuffer);
4299     else
4300         glReadBuffer(surface_get_gl_buffer(surface));
4301     checkGLcall("glReadBuffer");
4302
4303     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4304             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4305     checkGLcall("glCopyTexSubImage2D");
4306
4307     LEAVE_GL();
4308
4309     context_release(context);
4310 }
4311
4312 /* Context activation is done by the caller. */
4313 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4314         struct wined3d_context *context, BOOL srgb)
4315 {
4316     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4317     enum wined3d_conversion_type convert;
4318     struct wined3d_format format;
4319
4320     if (surface->flags & alloc_flag) return;
4321
4322     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4323     if (convert != WINED3D_CT_NONE || format.convert)
4324         surface->flags |= SFLAG_CONVERTED;
4325     else surface->flags &= ~SFLAG_CONVERTED;
4326
4327     surface_bind_and_dirtify(surface, context, srgb);
4328     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4329     surface->flags |= alloc_flag;
4330 }
4331
4332 /* Context activation is done by the caller. */
4333 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4334 {
4335     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4336     {
4337         struct wined3d_texture *texture = surface->container.u.texture;
4338         UINT sub_count = texture->level_count * texture->layer_count;
4339         UINT i;
4340
4341         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4342
4343         for (i = 0; i < sub_count; ++i)
4344         {
4345             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4346             surface_prepare_texture_internal(s, context, srgb);
4347         }
4348
4349         return;
4350     }
4351
4352     surface_prepare_texture_internal(surface, context, srgb);
4353 }
4354
4355 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4356 {
4357     if (multisample)
4358     {
4359         if (surface->rb_multisample)
4360             return;
4361
4362         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4363         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4364         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4365                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4366         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4367     }
4368     else
4369     {
4370         if (surface->rb_resolved)
4371             return;
4372
4373         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4374         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4375         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4376                 surface->pow2Width, surface->pow2Height);
4377         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4378     }
4379 }
4380
4381 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4382         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4383 {
4384     struct wined3d_device *device = surface->resource.device;
4385     UINT pitch = wined3d_surface_get_pitch(surface);
4386     const struct wined3d_gl_info *gl_info;
4387     struct wined3d_context *context;
4388     RECT local_rect;
4389     UINT w, h;
4390
4391     surface_get_rect(surface, rect, &local_rect);
4392
4393     mem += local_rect.top * pitch + local_rect.left * bpp;
4394     w = local_rect.right - local_rect.left;
4395     h = local_rect.bottom - local_rect.top;
4396
4397     /* Activate the correct context for the render target */
4398     context = context_acquire(device, surface);
4399     context_apply_blit_state(context, device);
4400     gl_info = context->gl_info;
4401
4402     ENTER_GL();
4403
4404     if (!surface_is_offscreen(surface))
4405     {
4406         GLenum buffer = surface_get_gl_buffer(surface);
4407         TRACE("Unlocking %#x buffer.\n", buffer);
4408         context_set_draw_buffer(context, buffer);
4409
4410         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4411         glPixelZoom(1.0f, -1.0f);
4412     }
4413     else
4414     {
4415         /* Primary offscreen render target */
4416         TRACE("Offscreen render target.\n");
4417         context_set_draw_buffer(context, device->offscreenBuffer);
4418
4419         glPixelZoom(1.0f, 1.0f);
4420     }
4421
4422     glRasterPos3i(local_rect.left, local_rect.top, 1);
4423     checkGLcall("glRasterPos3i");
4424
4425     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4426     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4427
4428     if (surface->flags & SFLAG_PBO)
4429     {
4430         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4431         checkGLcall("glBindBufferARB");
4432     }
4433
4434     glDrawPixels(w, h, fmt, type, mem);
4435     checkGLcall("glDrawPixels");
4436
4437     if (surface->flags & SFLAG_PBO)
4438     {
4439         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4440         checkGLcall("glBindBufferARB");
4441     }
4442
4443     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4444     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4445
4446     LEAVE_GL();
4447
4448     if (wined3d_settings.strict_draw_ordering
4449             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4450             && surface->container.u.swapchain->front_buffer == surface))
4451         wglFlush();
4452
4453     context_release(context);
4454 }
4455
4456 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck, BOOL use_texturing,
4457         struct wined3d_format *format, enum wined3d_conversion_type *conversion_type)
4458 {
4459     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4460     const struct wined3d_device *device = surface->resource.device;
4461     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4462     BOOL blit_supported = FALSE;
4463
4464     /* Copy the default values from the surface. Below we might perform fixups */
4465     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4466     *format = *surface->resource.format;
4467     *conversion_type = WINED3D_CT_NONE;
4468
4469     /* Ok, now look if we have to do any conversion */
4470     switch (surface->resource.format->id)
4471     {
4472         case WINED3DFMT_P8_UINT:
4473             /* Below the call to blit_supported is disabled for Wine 1.2
4474              * because the function isn't operating correctly yet. At the
4475              * moment 8-bit blits are handled in software and if certain GL
4476              * extensions are around, surface conversion is performed at
4477              * upload time. The blit_supported call recognizes it as a
4478              * destination fixup. This type of upload 'fixup' and 8-bit to
4479              * 8-bit blits need to be handled by the blit_shader.
4480              * TODO: get rid of this #if 0. */
4481 #if 0
4482             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4483                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4484                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4485 #endif
4486             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4487
4488             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4489              * texturing. Further also use conversion in case of color keying.
4490              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4491              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4492              * conflicts with this.
4493              */
4494             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4495                     || colorkey_active || !use_texturing)
4496             {
4497                 format->glFormat = GL_RGBA;
4498                 format->glInternal = GL_RGBA;
4499                 format->glType = GL_UNSIGNED_BYTE;
4500                 format->conv_byte_count = 4;
4501                 if (colorkey_active)
4502                     *conversion_type = WINED3D_CT_PALETTED_CK;
4503                 else
4504                     *conversion_type = WINED3D_CT_PALETTED;
4505             }
4506             break;
4507
4508         case WINED3DFMT_B2G3R3_UNORM:
4509             /* **********************
4510                 GL_UNSIGNED_BYTE_3_3_2
4511                 ********************** */
4512             if (colorkey_active) {
4513                 /* This texture format will never be used.. So do not care about color keying
4514                     up until the point in time it will be needed :-) */
4515                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4516             }
4517             break;
4518
4519         case WINED3DFMT_B5G6R5_UNORM:
4520             if (colorkey_active)
4521             {
4522                 *conversion_type = WINED3D_CT_CK_565;
4523                 format->glFormat = GL_RGBA;
4524                 format->glInternal = GL_RGB5_A1;
4525                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4526                 format->conv_byte_count = 2;
4527             }
4528             break;
4529
4530         case WINED3DFMT_B5G5R5X1_UNORM:
4531             if (colorkey_active)
4532             {
4533                 *conversion_type = WINED3D_CT_CK_5551;
4534                 format->glFormat = GL_BGRA;
4535                 format->glInternal = GL_RGB5_A1;
4536                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4537                 format->conv_byte_count = 2;
4538             }
4539             break;
4540
4541         case WINED3DFMT_B8G8R8_UNORM:
4542             if (colorkey_active)
4543             {
4544                 *conversion_type = WINED3D_CT_CK_RGB24;
4545                 format->glFormat = GL_RGBA;
4546                 format->glInternal = GL_RGBA8;
4547                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4548                 format->conv_byte_count = 4;
4549             }
4550             break;
4551
4552         case WINED3DFMT_B8G8R8X8_UNORM:
4553             if (colorkey_active)
4554             {
4555                 *conversion_type = WINED3D_CT_RGB32_888;
4556                 format->glFormat = GL_RGBA;
4557                 format->glInternal = GL_RGBA8;
4558                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4559                 format->conv_byte_count = 4;
4560             }
4561             break;
4562
4563         case WINED3DFMT_B8G8R8A8_UNORM:
4564             if (colorkey_active)
4565             {
4566                 *conversion_type = WINED3D_CT_CK_ARGB32;
4567                 format->conv_byte_count = 4;
4568             }
4569             break;
4570
4571         default:
4572             break;
4573     }
4574
4575     if (*conversion_type != WINED3D_CT_NONE)
4576     {
4577         format->rtInternal = format->glInternal;
4578         format->glGammaInternal = format->glInternal;
4579     }
4580
4581     return WINED3D_OK;
4582 }
4583
4584 static BOOL color_in_range(const struct wined3d_color_key *color_key, DWORD color)
4585 {
4586     /* FIXME: Is this really how color keys are supposed to work? I think it
4587      * makes more sense to compare the individual channels. */
4588     return color >= color_key->color_space_low_value
4589             && color <= color_key->color_space_high_value;
4590 }
4591
4592 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4593 {
4594     const struct wined3d_device *device = surface->resource.device;
4595     const struct wined3d_palette *pal = surface->palette;
4596     BOOL index_in_alpha = FALSE;
4597     unsigned int i;
4598
4599     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4600      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4601      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4602      * duplicate entries. Store the color key in the unused alpha component to speed the
4603      * download up and to make conversion unneeded. */
4604     index_in_alpha = primary_render_target_is_p8(device);
4605
4606     if (!pal)
4607     {
4608         ERR("This code should never get entered for DirectDraw!, expect problems\n");
4609         if (index_in_alpha)
4610         {
4611             /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4612              * there's no palette at this time. */
4613             for (i = 0; i < 256; i++) table[i][3] = i;
4614         }
4615     }
4616     else
4617     {
4618         TRACE("Using surface palette %p\n", pal);
4619         /* Get the surface's palette */
4620         for (i = 0; i < 256; ++i)
4621         {
4622             table[i][0] = pal->palents[i].peRed;
4623             table[i][1] = pal->palents[i].peGreen;
4624             table[i][2] = pal->palents[i].peBlue;
4625
4626             /* When index_in_alpha is set the palette index is stored in the
4627              * alpha component. In case of a readback we can then read
4628              * GL_ALPHA. Color keying is handled in BltOverride using a
4629              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4630              * color key itself is passed to glAlphaFunc in other cases the
4631              * alpha component of pixels that should be masked away is set to 0. */
4632             if (index_in_alpha)
4633                 table[i][3] = i;
4634             else if (colorkey && color_in_range(&surface->src_blt_color_key, i))
4635                 table[i][3] = 0x00;
4636             else if (pal->flags & WINEDDPCAPS_ALPHA)
4637                 table[i][3] = pal->palents[i].peFlags;
4638             else
4639                 table[i][3] = 0xFF;
4640         }
4641     }
4642 }
4643
4644 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width, UINT height,
4645         UINT outpitch, enum wined3d_conversion_type conversion_type, struct wined3d_surface *surface)
4646 {
4647     const BYTE *source;
4648     BYTE *dest;
4649
4650     TRACE("src %p, dst %p, pitch %u, width %u, height %u, outpitch %u, conversion_type %#x, surface %p.\n",
4651             src, dst, pitch, width, height, outpitch, conversion_type, surface);
4652
4653     switch (conversion_type)
4654     {
4655         case WINED3D_CT_NONE:
4656         {
4657             memcpy(dst, src, pitch * height);
4658             break;
4659         }
4660
4661         case WINED3D_CT_PALETTED:
4662         case WINED3D_CT_PALETTED_CK:
4663         {
4664             BYTE table[256][4];
4665             unsigned int x, y;
4666
4667             d3dfmt_p8_init_palette(surface, table, (conversion_type == WINED3D_CT_PALETTED_CK));
4668
4669             for (y = 0; y < height; y++)
4670             {
4671                 source = src + pitch * y;
4672                 dest = dst + outpitch * y;
4673                 /* This is an 1 bpp format, using the width here is fine */
4674                 for (x = 0; x < width; x++) {
4675                     BYTE color = *source++;
4676                     *dest++ = table[color][0];
4677                     *dest++ = table[color][1];
4678                     *dest++ = table[color][2];
4679                     *dest++ = table[color][3];
4680                 }
4681             }
4682         }
4683         break;
4684
4685         case WINED3D_CT_CK_565:
4686         {
4687             /* Converting the 565 format in 5551 packed to emulate color-keying.
4688
4689               Note : in all these conversion, it would be best to average the averaging
4690                       pixels to get the color of the pixel that will be color-keyed to
4691                       prevent 'color bleeding'. This will be done later on if ever it is
4692                       too visible.
4693
4694               Note2: Nvidia documents say that their driver does not support alpha + color keying
4695                      on the same surface and disables color keying in such a case
4696             */
4697             unsigned int x, y;
4698             const WORD *Source;
4699             WORD *Dest;
4700
4701             TRACE("Color keyed 565\n");
4702
4703             for (y = 0; y < height; y++) {
4704                 Source = (const WORD *)(src + y * pitch);
4705                 Dest = (WORD *) (dst + y * outpitch);
4706                 for (x = 0; x < width; x++ ) {
4707                     WORD color = *Source++;
4708                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4709                     if (!color_in_range(&surface->src_blt_color_key, color))
4710                         *Dest |= 0x0001;
4711                     Dest++;
4712                 }
4713             }
4714         }
4715         break;
4716
4717         case WINED3D_CT_CK_5551:
4718         {
4719             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4720             unsigned int x, y;
4721             const WORD *Source;
4722             WORD *Dest;
4723             TRACE("Color keyed 5551\n");
4724             for (y = 0; y < height; y++) {
4725                 Source = (const WORD *)(src + y * pitch);
4726                 Dest = (WORD *) (dst + y * outpitch);
4727                 for (x = 0; x < width; x++ ) {
4728                     WORD color = *Source++;
4729                     *Dest = color;
4730                     if (!color_in_range(&surface->src_blt_color_key, color))
4731                         *Dest |= (1 << 15);
4732                     else
4733                         *Dest &= ~(1 << 15);
4734                     Dest++;
4735                 }
4736             }
4737         }
4738         break;
4739
4740         case WINED3D_CT_CK_RGB24:
4741         {
4742             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4743             unsigned int x, y;
4744             for (y = 0; y < height; y++)
4745             {
4746                 source = src + pitch * y;
4747                 dest = dst + outpitch * y;
4748                 for (x = 0; x < width; x++) {
4749                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4750                     DWORD dstcolor = color << 8;
4751                     if (!color_in_range(&surface->src_blt_color_key, color))
4752                         dstcolor |= 0xff;
4753                     *(DWORD*)dest = dstcolor;
4754                     source += 3;
4755                     dest += 4;
4756                 }
4757             }
4758         }
4759         break;
4760
4761         case WINED3D_CT_RGB32_888:
4762         {
4763             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4764             unsigned int x, y;
4765             for (y = 0; y < height; y++)
4766             {
4767                 source = src + pitch * y;
4768                 dest = dst + outpitch * y;
4769                 for (x = 0; x < width; x++) {
4770                     DWORD color = 0xffffff & *(const DWORD*)source;
4771                     DWORD dstcolor = color << 8;
4772                     if (!color_in_range(&surface->src_blt_color_key, color))
4773                         dstcolor |= 0xff;
4774                     *(DWORD*)dest = dstcolor;
4775                     source += 4;
4776                     dest += 4;
4777                 }
4778             }
4779         }
4780         break;
4781
4782         case WINED3D_CT_CK_ARGB32:
4783         {
4784             unsigned int x, y;
4785             for (y = 0; y < height; ++y)
4786             {
4787                 source = src + pitch * y;
4788                 dest = dst + outpitch * y;
4789                 for (x = 0; x < width; ++x)
4790                 {
4791                     DWORD color = *(const DWORD *)source;
4792                     if (color_in_range(&surface->src_blt_color_key, color))
4793                         color &= ~0xff000000;
4794                     *(DWORD*)dest = color;
4795                     source += 4;
4796                     dest += 4;
4797                 }
4798             }
4799         }
4800         break;
4801
4802         default:
4803             ERR("Unsupported conversion type %#x.\n", conversion_type);
4804     }
4805     return WINED3D_OK;
4806 }
4807
4808 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4809 {
4810     /* Flip the surface contents */
4811     /* Flip the DC */
4812     {
4813         HDC tmp;
4814         tmp = front->hDC;
4815         front->hDC = back->hDC;
4816         back->hDC = tmp;
4817     }
4818
4819     /* Flip the DIBsection */
4820     {
4821         HBITMAP tmp = front->dib.DIBsection;
4822         front->dib.DIBsection = back->dib.DIBsection;
4823         back->dib.DIBsection = tmp;
4824     }
4825
4826     /* Flip the surface data */
4827     {
4828         void* tmp;
4829
4830         tmp = front->dib.bitmap_data;
4831         front->dib.bitmap_data = back->dib.bitmap_data;
4832         back->dib.bitmap_data = tmp;
4833
4834         tmp = front->resource.allocatedMemory;
4835         front->resource.allocatedMemory = back->resource.allocatedMemory;
4836         back->resource.allocatedMemory = tmp;
4837
4838         tmp = front->resource.heapMemory;
4839         front->resource.heapMemory = back->resource.heapMemory;
4840         back->resource.heapMemory = tmp;
4841     }
4842
4843     /* Flip the PBO */
4844     {
4845         GLuint tmp_pbo = front->pbo;
4846         front->pbo = back->pbo;
4847         back->pbo = tmp_pbo;
4848     }
4849
4850     /* Flip the opengl texture */
4851     {
4852         GLuint tmp;
4853
4854         tmp = back->texture_name;
4855         back->texture_name = front->texture_name;
4856         front->texture_name = tmp;
4857
4858         tmp = back->texture_name_srgb;
4859         back->texture_name_srgb = front->texture_name_srgb;
4860         front->texture_name_srgb = tmp;
4861
4862         tmp = back->rb_multisample;
4863         back->rb_multisample = front->rb_multisample;
4864         front->rb_multisample = tmp;
4865
4866         tmp = back->rb_resolved;
4867         back->rb_resolved = front->rb_resolved;
4868         front->rb_resolved = tmp;
4869
4870         resource_unload(&back->resource);
4871         resource_unload(&front->resource);
4872     }
4873
4874     {
4875         DWORD tmp_flags = back->flags;
4876         back->flags = front->flags;
4877         front->flags = tmp_flags;
4878     }
4879 }
4880
4881 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4882  * pixel copy calls. */
4883 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4884         const RECT *src_rect, const RECT *dst_rect_in, enum wined3d_texture_filter_type filter)
4885 {
4886     struct wined3d_device *device = dst_surface->resource.device;
4887     float xrel, yrel;
4888     UINT row;
4889     struct wined3d_context *context;
4890     BOOL upsidedown = FALSE;
4891     RECT dst_rect = *dst_rect_in;
4892
4893     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4894      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4895      */
4896     if(dst_rect.top > dst_rect.bottom) {
4897         UINT tmp = dst_rect.bottom;
4898         dst_rect.bottom = dst_rect.top;
4899         dst_rect.top = tmp;
4900         upsidedown = TRUE;
4901     }
4902
4903     context = context_acquire(device, src_surface);
4904     context_apply_blit_state(context, device);
4905     surface_internal_preload(dst_surface, SRGB_RGB);
4906     ENTER_GL();
4907
4908     /* Bind the target texture */
4909     context_bind_texture(context, dst_surface->texture_target, dst_surface->texture_name);
4910     if (surface_is_offscreen(src_surface))
4911     {
4912         TRACE("Reading from an offscreen target\n");
4913         upsidedown = !upsidedown;
4914         glReadBuffer(device->offscreenBuffer);
4915     }
4916     else
4917     {
4918         glReadBuffer(surface_get_gl_buffer(src_surface));
4919     }
4920     checkGLcall("glReadBuffer");
4921
4922     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4923     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4924
4925     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4926     {
4927         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4928
4929         if (filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT)
4930             ERR("Texture filtering not supported in direct blit.\n");
4931     }
4932     else if ((filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT)
4933             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4934     {
4935         ERR("Texture filtering not supported in direct blit\n");
4936     }
4937
4938     if (upsidedown
4939             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4940             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4941     {
4942         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4943
4944         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4945                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4946                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4947                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4948     }
4949     else
4950     {
4951         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4952         /* I have to process this row by row to swap the image,
4953          * otherwise it would be upside down, so stretching in y direction
4954          * doesn't cost extra time
4955          *
4956          * However, stretching in x direction can be avoided if not necessary
4957          */
4958         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4959             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4960             {
4961                 /* Well, that stuff works, but it's very slow.
4962                  * find a better way instead
4963                  */
4964                 UINT col;
4965
4966                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4967                 {
4968                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4969                             dst_rect.left + col /* x offset */, row /* y offset */,
4970                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4971                 }
4972             }
4973             else
4974             {
4975                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4976                         dst_rect.left /* x offset */, row /* y offset */,
4977                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4978             }
4979         }
4980     }
4981     checkGLcall("glCopyTexSubImage2D");
4982
4983     LEAVE_GL();
4984     context_release(context);
4985
4986     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4987      * path is never entered
4988      */
4989     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4990 }
4991
4992 /* Uses the hardware to stretch and flip the image */
4993 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4994         const RECT *src_rect, const RECT *dst_rect_in, enum wined3d_texture_filter_type filter)
4995 {
4996     struct wined3d_device *device = dst_surface->resource.device;
4997     struct wined3d_swapchain *src_swapchain = NULL;
4998     GLuint src, backup = 0;
4999     float left, right, top, bottom; /* Texture coordinates */
5000     UINT fbwidth = src_surface->resource.width;
5001     UINT fbheight = src_surface->resource.height;
5002     struct wined3d_context *context;
5003     GLenum drawBuffer = GL_BACK;
5004     GLenum texture_target;
5005     BOOL noBackBufferBackup;
5006     BOOL src_offscreen;
5007     BOOL upsidedown = FALSE;
5008     RECT dst_rect = *dst_rect_in;
5009
5010     TRACE("Using hwstretch blit\n");
5011     /* Activate the Proper context for reading from the source surface, set it up for blitting */
5012     context = context_acquire(device, src_surface);
5013     context_apply_blit_state(context, device);
5014     surface_internal_preload(dst_surface, SRGB_RGB);
5015
5016     src_offscreen = surface_is_offscreen(src_surface);
5017     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
5018     if (!noBackBufferBackup && !src_surface->texture_name)
5019     {
5020         /* Get it a description */
5021         surface_internal_preload(src_surface, SRGB_RGB);
5022     }
5023     ENTER_GL();
5024
5025     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
5026      * This way we don't have to wait for the 2nd readback to finish to leave this function.
5027      */
5028     if (context->aux_buffers >= 2)
5029     {
5030         /* Got more than one aux buffer? Use the 2nd aux buffer */
5031         drawBuffer = GL_AUX1;
5032     }
5033     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
5034     {
5035         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
5036         drawBuffer = GL_AUX0;
5037     }
5038
5039     if(noBackBufferBackup) {
5040         glGenTextures(1, &backup);
5041         checkGLcall("glGenTextures");
5042         context_bind_texture(context, GL_TEXTURE_2D, backup);
5043         texture_target = GL_TEXTURE_2D;
5044     } else {
5045         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
5046          * we are reading from the back buffer, the backup can be used as source texture
5047          */
5048         texture_target = src_surface->texture_target;
5049         context_bind_texture(context, texture_target, src_surface->texture_name);
5050         glEnable(texture_target);
5051         checkGLcall("glEnable(texture_target)");
5052
5053         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
5054         src_surface->flags &= ~SFLAG_INTEXTURE;
5055     }
5056
5057     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
5058      * glCopyTexSubImage is a bit picky about the parameters we pass to it
5059      */
5060     if(dst_rect.top > dst_rect.bottom) {
5061         UINT tmp = dst_rect.bottom;
5062         dst_rect.bottom = dst_rect.top;
5063         dst_rect.top = tmp;
5064         upsidedown = TRUE;
5065     }
5066
5067     if (src_offscreen)
5068     {
5069         TRACE("Reading from an offscreen target\n");
5070         upsidedown = !upsidedown;
5071         glReadBuffer(device->offscreenBuffer);
5072     }
5073     else
5074     {
5075         glReadBuffer(surface_get_gl_buffer(src_surface));
5076     }
5077
5078     /* TODO: Only back up the part that will be overwritten */
5079     glCopyTexSubImage2D(texture_target, 0,
5080                         0, 0 /* read offsets */,
5081                         0, 0,
5082                         fbwidth,
5083                         fbheight);
5084
5085     checkGLcall("glCopyTexSubImage2D");
5086
5087     /* No issue with overriding these - the sampler is dirty due to blit usage */
5088     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
5089             wined3d_gl_mag_filter(magLookup, filter));
5090     checkGLcall("glTexParameteri");
5091     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
5092             wined3d_gl_min_mip_filter(minMipLookup, filter, WINED3D_TEXF_NONE));
5093     checkGLcall("glTexParameteri");
5094
5095     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5096         src_swapchain = src_surface->container.u.swapchain;
5097     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
5098     {
5099         src = backup ? backup : src_surface->texture_name;
5100     }
5101     else
5102     {
5103         glReadBuffer(GL_FRONT);
5104         checkGLcall("glReadBuffer(GL_FRONT)");
5105
5106         glGenTextures(1, &src);
5107         checkGLcall("glGenTextures(1, &src)");
5108         context_bind_texture(context, GL_TEXTURE_2D, src);
5109
5110         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5111          * out for power of 2 sizes
5112          */
5113         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5114                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5115         checkGLcall("glTexImage2D");
5116         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
5117                             0, 0 /* read offsets */,
5118                             0, 0,
5119                             fbwidth,
5120                             fbheight);
5121
5122         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5123         checkGLcall("glTexParameteri");
5124         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5125         checkGLcall("glTexParameteri");
5126
5127         glReadBuffer(GL_BACK);
5128         checkGLcall("glReadBuffer(GL_BACK)");
5129
5130         if(texture_target != GL_TEXTURE_2D) {
5131             glDisable(texture_target);
5132             glEnable(GL_TEXTURE_2D);
5133             texture_target = GL_TEXTURE_2D;
5134         }
5135     }
5136     checkGLcall("glEnd and previous");
5137
5138     left = src_rect->left;
5139     right = src_rect->right;
5140
5141     if (!upsidedown)
5142     {
5143         top = src_surface->resource.height - src_rect->top;
5144         bottom = src_surface->resource.height - src_rect->bottom;
5145     }
5146     else
5147     {
5148         top = src_surface->resource.height - src_rect->bottom;
5149         bottom = src_surface->resource.height - src_rect->top;
5150     }
5151
5152     if (src_surface->flags & SFLAG_NORMCOORD)
5153     {
5154         left /= src_surface->pow2Width;
5155         right /= src_surface->pow2Width;
5156         top /= src_surface->pow2Height;
5157         bottom /= src_surface->pow2Height;
5158     }
5159
5160     /* draw the source texture stretched and upside down. The correct surface is bound already */
5161     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5162     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5163
5164     context_set_draw_buffer(context, drawBuffer);
5165     glReadBuffer(drawBuffer);
5166
5167     glBegin(GL_QUADS);
5168         /* bottom left */
5169         glTexCoord2f(left, bottom);
5170         glVertex2i(0, 0);
5171
5172         /* top left */
5173         glTexCoord2f(left, top);
5174         glVertex2i(0, dst_rect.bottom - dst_rect.top);
5175
5176         /* top right */
5177         glTexCoord2f(right, top);
5178         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5179
5180         /* bottom right */
5181         glTexCoord2f(right, bottom);
5182         glVertex2i(dst_rect.right - dst_rect.left, 0);
5183     glEnd();
5184     checkGLcall("glEnd and previous");
5185
5186     if (texture_target != dst_surface->texture_target)
5187     {
5188         glDisable(texture_target);
5189         glEnable(dst_surface->texture_target);
5190         texture_target = dst_surface->texture_target;
5191     }
5192
5193     /* Now read the stretched and upside down image into the destination texture */
5194     context_bind_texture(context, texture_target, dst_surface->texture_name);
5195     glCopyTexSubImage2D(texture_target,
5196                         0,
5197                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5198                         0, 0, /* We blitted the image to the origin */
5199                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5200     checkGLcall("glCopyTexSubImage2D");
5201
5202     if(drawBuffer == GL_BACK) {
5203         /* Write the back buffer backup back */
5204         if(backup) {
5205             if(texture_target != GL_TEXTURE_2D) {
5206                 glDisable(texture_target);
5207                 glEnable(GL_TEXTURE_2D);
5208                 texture_target = GL_TEXTURE_2D;
5209             }
5210             context_bind_texture(context, GL_TEXTURE_2D, backup);
5211         }
5212         else
5213         {
5214             if (texture_target != src_surface->texture_target)
5215             {
5216                 glDisable(texture_target);
5217                 glEnable(src_surface->texture_target);
5218                 texture_target = src_surface->texture_target;
5219             }
5220             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5221         }
5222
5223         glBegin(GL_QUADS);
5224             /* top left */
5225             glTexCoord2f(0.0f, 0.0f);
5226             glVertex2i(0, fbheight);
5227
5228             /* bottom left */
5229             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5230             glVertex2i(0, 0);
5231
5232             /* bottom right */
5233             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5234                     (float)fbheight / (float)src_surface->pow2Height);
5235             glVertex2i(fbwidth, 0);
5236
5237             /* top right */
5238             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5239             glVertex2i(fbwidth, fbheight);
5240         glEnd();
5241     }
5242     glDisable(texture_target);
5243     checkGLcall("glDisable(texture_target)");
5244
5245     /* Cleanup */
5246     if (src != src_surface->texture_name && src != backup)
5247     {
5248         glDeleteTextures(1, &src);
5249         checkGLcall("glDeleteTextures(1, &src)");
5250     }
5251     if(backup) {
5252         glDeleteTextures(1, &backup);
5253         checkGLcall("glDeleteTextures(1, &backup)");
5254     }
5255
5256     LEAVE_GL();
5257
5258     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5259
5260     context_release(context);
5261
5262     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5263      * path is never entered
5264      */
5265     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5266 }
5267
5268 /* Front buffer coordinates are always full screen coordinates, but our GL
5269  * drawable is limited to the window's client area. The sysmem and texture
5270  * copies do have the full screen size. Note that GL has a bottom-left
5271  * origin, while D3D has a top-left origin. */
5272 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5273 {
5274     UINT drawable_height;
5275
5276     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5277             && surface == surface->container.u.swapchain->front_buffer)
5278     {
5279         POINT offset = {0, 0};
5280         RECT windowsize;
5281
5282         ScreenToClient(window, &offset);
5283         OffsetRect(rect, offset.x, offset.y);
5284
5285         GetClientRect(window, &windowsize);
5286         drawable_height = windowsize.bottom - windowsize.top;
5287     }
5288     else
5289     {
5290         drawable_height = surface->resource.height;
5291     }
5292
5293     rect->top = drawable_height - rect->top;
5294     rect->bottom = drawable_height - rect->bottom;
5295 }
5296
5297 static void surface_blt_to_drawable(const struct wined3d_device *device,
5298         enum wined3d_texture_filter_type filter, BOOL color_key,
5299         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5300         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5301 {
5302     struct wined3d_context *context;
5303     RECT src_rect, dst_rect;
5304
5305     src_rect = *src_rect_in;
5306     dst_rect = *dst_rect_in;
5307
5308     /* Make sure the surface is up-to-date. This should probably use
5309      * surface_load_location() and worry about the destination surface too,
5310      * unless we're overwriting it completely. */
5311     surface_internal_preload(src_surface, SRGB_RGB);
5312
5313     /* Activate the destination context, set it up for blitting */
5314     context = context_acquire(device, dst_surface);
5315     context_apply_blit_state(context, device);
5316
5317     if (!surface_is_offscreen(dst_surface))
5318         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5319
5320     device->blitter->set_shader(device->blit_priv, context, src_surface);
5321
5322     ENTER_GL();
5323
5324     if (color_key)
5325     {
5326         glEnable(GL_ALPHA_TEST);
5327         checkGLcall("glEnable(GL_ALPHA_TEST)");
5328
5329         /* When the primary render target uses P8, the alpha component
5330          * contains the palette index. Which means that the colorkey is one of
5331          * the palette entries. In other cases pixels that should be masked
5332          * away have alpha set to 0. */
5333         if (primary_render_target_is_p8(device))
5334             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->src_blt_color_key.color_space_low_value / 256.0f);
5335         else
5336             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5337         checkGLcall("glAlphaFunc");
5338     }
5339     else
5340     {
5341         glDisable(GL_ALPHA_TEST);
5342         checkGLcall("glDisable(GL_ALPHA_TEST)");
5343     }
5344
5345     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5346
5347     if (color_key)
5348     {
5349         glDisable(GL_ALPHA_TEST);
5350         checkGLcall("glDisable(GL_ALPHA_TEST)");
5351     }
5352
5353     LEAVE_GL();
5354
5355     /* Leave the opengl state valid for blitting */
5356     device->blitter->unset_shader(context->gl_info);
5357
5358     if (wined3d_settings.strict_draw_ordering
5359             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5360             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5361         wglFlush(); /* Flush to ensure ordering across contexts. */
5362
5363     context_release(context);
5364 }
5365
5366 /* Do not call while under the GL lock. */
5367 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const struct wined3d_color *color)
5368 {
5369     struct wined3d_device *device = s->resource.device;
5370     const struct blit_shader *blitter;
5371
5372     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5373             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5374     if (!blitter)
5375     {
5376         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5377         return WINED3DERR_INVALIDCALL;
5378     }
5379
5380     return blitter->color_fill(device, s, rect, color);
5381 }
5382
5383 /* Do not call while under the GL lock. */
5384 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5385         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5386         enum wined3d_texture_filter_type filter)
5387 {
5388     struct wined3d_device *device = dst_surface->resource.device;
5389     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5390     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5391
5392     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5393             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5394             flags, DDBltFx, debug_d3dtexturefiltertype(filter));
5395
5396     /* Get the swapchain. One of the surfaces has to be a primary surface */
5397     if (dst_surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
5398     {
5399         WARN("Destination is in sysmem, rejecting gl blt\n");
5400         return WINED3DERR_INVALIDCALL;
5401     }
5402
5403     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5404         dstSwapchain = dst_surface->container.u.swapchain;
5405
5406     if (src_surface)
5407     {
5408         if (src_surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
5409         {
5410             WARN("Src is in sysmem, rejecting gl blt\n");
5411             return WINED3DERR_INVALIDCALL;
5412         }
5413
5414         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5415             srcSwapchain = src_surface->container.u.swapchain;
5416     }
5417
5418     /* Early sort out of cases where no render target is used */
5419     if (!dstSwapchain && !srcSwapchain
5420             && src_surface != device->fb.render_targets[0]
5421             && dst_surface != device->fb.render_targets[0])
5422     {
5423         TRACE("No surface is render target, not using hardware blit.\n");
5424         return WINED3DERR_INVALIDCALL;
5425     }
5426
5427     /* No destination color keying supported */
5428     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5429     {
5430         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5431         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5432         return WINED3DERR_INVALIDCALL;
5433     }
5434
5435     if (dstSwapchain && dstSwapchain == srcSwapchain)
5436     {
5437         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5438         return WINED3DERR_INVALIDCALL;
5439     }
5440
5441     if (dstSwapchain && srcSwapchain)
5442     {
5443         FIXME("Implement hardware blit between two different swapchains\n");
5444         return WINED3DERR_INVALIDCALL;
5445     }
5446
5447     if (dstSwapchain)
5448     {
5449         /* Handled with regular texture -> swapchain blit */
5450         if (src_surface == device->fb.render_targets[0])
5451             TRACE("Blit from active render target to a swapchain\n");
5452     }
5453     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5454     {
5455         FIXME("Implement blit from a swapchain to the active render target\n");
5456         return WINED3DERR_INVALIDCALL;
5457     }
5458
5459     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5460     {
5461         /* Blit from render target to texture */
5462         BOOL stretchx;
5463
5464         /* P8 read back is not implemented */
5465         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5466                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5467         {
5468             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5469             return WINED3DERR_INVALIDCALL;
5470         }
5471
5472         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5473         {
5474             TRACE("Color keying not supported by frame buffer to texture blit\n");
5475             return WINED3DERR_INVALIDCALL;
5476             /* Destination color key is checked above */
5477         }
5478
5479         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5480             stretchx = TRUE;
5481         else
5482             stretchx = FALSE;
5483
5484         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5485          * flip the image nor scale it.
5486          *
5487          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5488          * -> If the app wants a image width an unscaled width, copy it line per line
5489          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5490          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5491          *    back buffer. This is slower than reading line per line, thus not used for flipping
5492          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5493          *    pixel by pixel. */
5494         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5495                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5496         {
5497             TRACE("No stretching in x direction, using direct framebuffer -> texture copy.\n");
5498             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, filter);
5499         }
5500         else
5501         {
5502             TRACE("Using hardware stretching to flip / stretch the texture.\n");
5503             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, filter);
5504         }
5505
5506         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5507         {
5508             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5509             dst_surface->resource.allocatedMemory = NULL;
5510             dst_surface->resource.heapMemory = NULL;
5511         }
5512         else
5513         {
5514             dst_surface->flags &= ~SFLAG_INSYSMEM;
5515         }
5516
5517         return WINED3D_OK;
5518     }
5519     else if (src_surface)
5520     {
5521         /* Blit from offscreen surface to render target */
5522         struct wined3d_color_key old_blt_key = src_surface->src_blt_color_key;
5523         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5524
5525         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5526
5527         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5528                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5529                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5530         {
5531             FIXME("Unsupported blit operation falling back to software\n");
5532             return WINED3DERR_INVALIDCALL;
5533         }
5534
5535         /* Color keying: Check if we have to do a color keyed blt,
5536          * and if not check if a color key is activated.
5537          *
5538          * Just modify the color keying parameters in the surface and restore them afterwards
5539          * The surface keeps track of the color key last used to load the opengl surface.
5540          * PreLoad will catch the change to the flags and color key and reload if necessary.
5541          */
5542         if (flags & WINEDDBLT_KEYSRC)
5543         {
5544             /* Use color key from surface */
5545         }
5546         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5547         {
5548             /* Use color key from DDBltFx */
5549             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5550             src_surface->src_blt_color_key = DDBltFx->ddckSrcColorkey;
5551         }
5552         else
5553         {
5554             /* Do not use color key */
5555             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5556         }
5557
5558         surface_blt_to_drawable(device, filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5559                 src_surface, src_rect, dst_surface, dst_rect);
5560
5561         /* Restore the color key parameters */
5562         src_surface->CKeyFlags = oldCKeyFlags;
5563         src_surface->src_blt_color_key = old_blt_key;
5564
5565         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5566
5567         return WINED3D_OK;
5568     }
5569
5570     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5571     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5572     return WINED3DERR_INVALIDCALL;
5573 }
5574
5575 /* GL locking is done by the caller */
5576 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5577         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5578 {
5579     struct wined3d_device *device = surface->resource.device;
5580     const struct wined3d_gl_info *gl_info = context->gl_info;
5581     GLint compare_mode = GL_NONE;
5582     struct blt_info info;
5583     GLint old_binding = 0;
5584     RECT rect;
5585
5586     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5587
5588     glDisable(GL_CULL_FACE);
5589     glDisable(GL_BLEND);
5590     glDisable(GL_ALPHA_TEST);
5591     glDisable(GL_SCISSOR_TEST);
5592     glDisable(GL_STENCIL_TEST);
5593     glEnable(GL_DEPTH_TEST);
5594     glDepthFunc(GL_ALWAYS);
5595     glDepthMask(GL_TRUE);
5596     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5597     glViewport(x, y, w, h);
5598     glDepthRange(0.0, 1.0);
5599
5600     SetRect(&rect, 0, h, w, 0);
5601     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5602     context_active_texture(context, context->gl_info, 0);
5603     glGetIntegerv(info.binding, &old_binding);
5604     glBindTexture(info.bind_target, texture);
5605     if (gl_info->supported[ARB_SHADOW])
5606     {
5607         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5608         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5609     }
5610
5611     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5612             gl_info, info.tex_type, &surface->ds_current_size);
5613
5614     glBegin(GL_TRIANGLE_STRIP);
5615     glTexCoord3fv(info.coords[0]);
5616     glVertex2f(-1.0f, -1.0f);
5617     glTexCoord3fv(info.coords[1]);
5618     glVertex2f(1.0f, -1.0f);
5619     glTexCoord3fv(info.coords[2]);
5620     glVertex2f(-1.0f, 1.0f);
5621     glTexCoord3fv(info.coords[3]);
5622     glVertex2f(1.0f, 1.0f);
5623     glEnd();
5624
5625     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5626     glBindTexture(info.bind_target, old_binding);
5627
5628     glPopAttrib();
5629
5630     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5631 }
5632
5633 void surface_modify_ds_location(struct wined3d_surface *surface,
5634         DWORD location, UINT w, UINT h)
5635 {
5636     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5637
5638     if (location & ~(SFLAG_LOCATIONS | SFLAG_DISCARDED))
5639         FIXME("Invalid location (%#x) specified.\n", location);
5640
5641     if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5642             || (!(surface->flags & SFLAG_INTEXTURE) && (location & SFLAG_INTEXTURE)))
5643     {
5644         if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5645         {
5646             TRACE("Passing to container.\n");
5647             wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5648         }
5649     }
5650
5651     surface->ds_current_size.cx = w;
5652     surface->ds_current_size.cy = h;
5653     surface->flags &= ~(SFLAG_LOCATIONS | SFLAG_DISCARDED);
5654     surface->flags |= location;
5655 }
5656
5657 /* Context activation is done by the caller. */
5658 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5659 {
5660     struct wined3d_device *device = surface->resource.device;
5661     GLsizei w, h;
5662
5663     TRACE("surface %p, new location %#x.\n", surface, location);
5664
5665     /* TODO: Make this work for modes other than FBO */
5666     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5667
5668     if (!(surface->flags & location))
5669     {
5670         w = surface->ds_current_size.cx;
5671         h = surface->ds_current_size.cy;
5672         surface->ds_current_size.cx = 0;
5673         surface->ds_current_size.cy = 0;
5674     }
5675     else
5676     {
5677         w = surface->resource.width;
5678         h = surface->resource.height;
5679     }
5680
5681     if (surface->ds_current_size.cx == surface->resource.width
5682             && surface->ds_current_size.cy == surface->resource.height)
5683     {
5684         TRACE("Location (%#x) is already up to date.\n", location);
5685         return;
5686     }
5687
5688     if (surface->current_renderbuffer)
5689     {
5690         FIXME("Not supported with fixed up depth stencil.\n");
5691         return;
5692     }
5693
5694     if (surface->flags & SFLAG_DISCARDED)
5695     {
5696         TRACE("Surface was discarded, no need copy data.\n");
5697         switch (location)
5698         {
5699             case SFLAG_INTEXTURE:
5700                 surface_prepare_texture(surface, context, FALSE);
5701                 break;
5702             case SFLAG_INRB_MULTISAMPLE:
5703                 surface_prepare_rb(surface, context->gl_info, TRUE);
5704                 break;
5705             case SFLAG_INDRAWABLE:
5706                 /* Nothing to do */
5707                 break;
5708             default:
5709                 FIXME("Unhandled location %#x\n", location);
5710         }
5711         surface->flags &= ~SFLAG_DISCARDED;
5712         surface->flags |= location;
5713         surface->ds_current_size.cx = surface->resource.width;
5714         surface->ds_current_size.cy = surface->resource.height;
5715         return;
5716     }
5717
5718     if (!(surface->flags & SFLAG_LOCATIONS))
5719     {
5720         FIXME("No up to date depth stencil location.\n");
5721         surface->flags |= location;
5722         surface->ds_current_size.cx = surface->resource.width;
5723         surface->ds_current_size.cy = surface->resource.height;
5724         return;
5725     }
5726
5727     if (location == SFLAG_INTEXTURE)
5728     {
5729         GLint old_binding = 0;
5730         GLenum bind_target;
5731
5732         /* The render target is allowed to be smaller than the depth/stencil
5733          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5734          * than the offscreen surface. Don't overwrite the offscreen surface
5735          * with undefined data. */
5736         w = min(w, context->swapchain->desc.backbuffer_width);
5737         h = min(h, context->swapchain->desc.backbuffer_height);
5738
5739         TRACE("Copying onscreen depth buffer to depth texture.\n");
5740
5741         ENTER_GL();
5742
5743         if (!device->depth_blt_texture)
5744         {
5745             glGenTextures(1, &device->depth_blt_texture);
5746         }
5747
5748         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5749          * directly on the FBO texture. That's because we need to flip. */
5750         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5751                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5752         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5753         {
5754             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5755             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5756         }
5757         else
5758         {
5759             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5760             bind_target = GL_TEXTURE_2D;
5761         }
5762         glBindTexture(bind_target, device->depth_blt_texture);
5763         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5764          * internal format, because the internal format might include stencil
5765          * data. In principle we should copy stencil data as well, but unless
5766          * the driver supports stencil export it's hard to do, and doesn't
5767          * seem to be needed in practice. If the hardware doesn't support
5768          * writing stencil data, the glCopyTexImage2D() call might trigger
5769          * software fallbacks. */
5770         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5771         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5772         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5773         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5774         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5775         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5776         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5777         glBindTexture(bind_target, old_binding);
5778
5779         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5780                 NULL, surface, SFLAG_INTEXTURE);
5781         context_set_draw_buffer(context, GL_NONE);
5782         glReadBuffer(GL_NONE);
5783
5784         /* Do the actual blit */
5785         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5786         checkGLcall("depth_blt");
5787
5788         context_invalidate_state(context, STATE_FRAMEBUFFER);
5789
5790         LEAVE_GL();
5791
5792         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5793     }
5794     else if (location == SFLAG_INDRAWABLE)
5795     {
5796         TRACE("Copying depth texture to onscreen depth buffer.\n");
5797
5798         ENTER_GL();
5799
5800         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5801                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5802         surface_depth_blt(surface, context, surface->texture_name,
5803                 0, surface->pow2Height - h, w, h, surface->texture_target);
5804         checkGLcall("depth_blt");
5805
5806         context_invalidate_state(context, STATE_FRAMEBUFFER);
5807
5808         LEAVE_GL();
5809
5810         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5811     }
5812     else
5813     {
5814         ERR("Invalid location (%#x) specified.\n", location);
5815     }
5816
5817     surface->flags |= location;
5818     surface->ds_current_size.cx = surface->resource.width;
5819     surface->ds_current_size.cy = surface->resource.height;
5820 }
5821
5822 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5823 {
5824     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5825     struct wined3d_surface *overlay;
5826
5827     TRACE("surface %p, location %s, persistent %#x.\n",
5828             surface, debug_surflocation(location), persistent);
5829
5830     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5831             && !(surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
5832             && (location & SFLAG_INDRAWABLE))
5833         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5834
5835     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5836             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5837         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5838
5839     if (persistent)
5840     {
5841         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5842                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5843         {
5844             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5845             {
5846                 TRACE("Passing to container.\n");
5847                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5848             }
5849         }
5850         surface->flags &= ~SFLAG_LOCATIONS;
5851         surface->flags |= location;
5852
5853         /* Redraw emulated overlays, if any */
5854         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5855         {
5856             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5857             {
5858                 surface_draw_overlay(overlay);
5859             }
5860         }
5861     }
5862     else
5863     {
5864         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5865         {
5866             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5867             {
5868                 TRACE("Passing to container\n");
5869                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5870             }
5871         }
5872         surface->flags &= ~location;
5873     }
5874
5875     if (!(surface->flags & SFLAG_LOCATIONS))
5876     {
5877         ERR("Surface %p does not have any up to date location.\n", surface);
5878     }
5879 }
5880
5881 static DWORD resource_access_from_location(DWORD location)
5882 {
5883     switch (location)
5884     {
5885         case SFLAG_INSYSMEM:
5886             return WINED3D_RESOURCE_ACCESS_CPU;
5887
5888         case SFLAG_INDRAWABLE:
5889         case SFLAG_INSRGBTEX:
5890         case SFLAG_INTEXTURE:
5891         case SFLAG_INRB_MULTISAMPLE:
5892         case SFLAG_INRB_RESOLVED:
5893             return WINED3D_RESOURCE_ACCESS_GPU;
5894
5895         default:
5896             FIXME("Unhandled location %#x.\n", location);
5897             return 0;
5898     }
5899 }
5900
5901 static void surface_load_sysmem(struct wined3d_surface *surface,
5902         const struct wined3d_gl_info *gl_info, const RECT *rect)
5903 {
5904     surface_prepare_system_memory(surface);
5905
5906     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED))
5907         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5908
5909     /* Download the surface to system memory. */
5910     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5911     {
5912         struct wined3d_device *device = surface->resource.device;
5913         struct wined3d_context *context;
5914
5915         /* TODO: Use already acquired context when possible. */
5916         context = context_acquire(device, NULL);
5917
5918         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5919         surface_download_data(surface, gl_info);
5920
5921         context_release(context);
5922
5923         return;
5924     }
5925
5926     if (surface->flags & SFLAG_INDRAWABLE)
5927     {
5928         read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5929                 wined3d_surface_get_pitch(surface));
5930         return;
5931     }
5932
5933     FIXME("Can't load surface %p with location flags %#x into sysmem.\n",
5934             surface, surface->flags & SFLAG_LOCATIONS);
5935 }
5936
5937 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5938         const struct wined3d_gl_info *gl_info, const RECT *rect)
5939 {
5940     struct wined3d_device *device = surface->resource.device;
5941     enum wined3d_conversion_type convert;
5942     struct wined3d_format format;
5943     UINT byte_count;
5944     BYTE *mem;
5945
5946     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5947     {
5948         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5949         return WINED3DERR_INVALIDCALL;
5950     }
5951
5952     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5953         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5954
5955     if (surface->flags & SFLAG_INTEXTURE)
5956     {
5957         RECT r;
5958
5959         surface_get_rect(surface, rect, &r);
5960         surface_blt_to_drawable(device, WINED3D_TEXF_POINT, FALSE, surface, &r, surface, &r);
5961
5962         return WINED3D_OK;
5963     }
5964
5965     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5966     {
5967         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5968          * path through sysmem. */
5969         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5970     }
5971
5972     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5973
5974     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5975      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5976      * called. */
5977     if ((convert != WINED3D_CT_NONE) && (surface->flags & SFLAG_PBO))
5978     {
5979         struct wined3d_context *context;
5980
5981         TRACE("Removing the pbo attached to surface %p.\n", surface);
5982
5983         /* TODO: Use already acquired context when possible. */
5984         context = context_acquire(device, NULL);
5985
5986         surface_remove_pbo(surface, gl_info);
5987
5988         context_release(context);
5989     }
5990
5991     if ((convert != WINED3D_CT_NONE) && surface->resource.allocatedMemory)
5992     {
5993         UINT height = surface->resource.height;
5994         UINT width = surface->resource.width;
5995         UINT src_pitch, dst_pitch;
5996
5997         byte_count = format.conv_byte_count;
5998         src_pitch = wined3d_surface_get_pitch(surface);
5999
6000         /* Stick to the alignment for the converted surface too, makes it
6001          * easier to load the surface. */
6002         dst_pitch = width * byte_count;
6003         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6004
6005         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6006         {
6007             ERR("Out of memory (%u).\n", dst_pitch * height);
6008             return E_OUTOFMEMORY;
6009         }
6010
6011         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
6012                 src_pitch, width, height, dst_pitch, convert, surface);
6013
6014         surface->flags |= SFLAG_CONVERTED;
6015     }
6016     else
6017     {
6018         surface->flags &= ~SFLAG_CONVERTED;
6019         mem = surface->resource.allocatedMemory;
6020         byte_count = format.byte_count;
6021     }
6022
6023     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
6024
6025     /* Don't delete PBO memory. */
6026     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6027         HeapFree(GetProcessHeap(), 0, mem);
6028
6029     return WINED3D_OK;
6030 }
6031
6032 static HRESULT surface_load_texture(struct wined3d_surface *surface,
6033         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
6034 {
6035     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
6036     struct wined3d_device *device = surface->resource.device;
6037     enum wined3d_conversion_type convert;
6038     struct wined3d_context *context;
6039     UINT width, src_pitch, dst_pitch;
6040     struct wined3d_bo_address data;
6041     struct wined3d_format format;
6042     POINT dst_point = {0, 0};
6043     BYTE *mem;
6044
6045     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
6046             && surface_is_offscreen(surface)
6047             && (surface->flags & SFLAG_INDRAWABLE))
6048     {
6049         surface_load_fb_texture(surface, srgb);
6050
6051         return WINED3D_OK;
6052     }
6053
6054     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
6055             && (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB)
6056             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6057                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6058                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6059     {
6060         if (srgb)
6061             surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, SFLAG_INTEXTURE,
6062                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
6063         else
6064             surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, SFLAG_INSRGBTEX,
6065                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
6066
6067         return WINED3D_OK;
6068     }
6069
6070     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED)
6071             && (!srgb || (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB))
6072             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6073                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6074                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6075     {
6076         DWORD src_location = surface->flags & SFLAG_INRB_RESOLVED ? SFLAG_INRB_RESOLVED : SFLAG_INRB_MULTISAMPLE;
6077         DWORD dst_location = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
6078         RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6079
6080         surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, src_location,
6081                 &rect, surface, dst_location, &rect);
6082
6083         return WINED3D_OK;
6084     }
6085
6086     /* Upload from system memory */
6087
6088     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
6089             TRUE /* We will use textures */, &format, &convert);
6090
6091     if (srgb)
6092     {
6093         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
6094         {
6095             /* Performance warning... */
6096             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6097             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6098         }
6099     }
6100     else
6101     {
6102         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6103         {
6104             /* Performance warning... */
6105             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6106             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6107         }
6108     }
6109
6110     if (!(surface->flags & SFLAG_INSYSMEM))
6111     {
6112         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6113         /* Lets hope we get it from somewhere... */
6114         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6115     }
6116
6117     /* TODO: Use already acquired context when possible. */
6118     context = context_acquire(device, NULL);
6119
6120     surface_prepare_texture(surface, context, srgb);
6121     surface_bind_and_dirtify(surface, context, srgb);
6122
6123     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6124     {
6125         surface->flags |= SFLAG_GLCKEY;
6126         surface->gl_color_key = surface->src_blt_color_key;
6127     }
6128     else surface->flags &= ~SFLAG_GLCKEY;
6129
6130     width = surface->resource.width;
6131     src_pitch = wined3d_surface_get_pitch(surface);
6132
6133     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6134      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6135      * called. */
6136     if ((convert != WINED3D_CT_NONE || format.convert) && (surface->flags & SFLAG_PBO))
6137     {
6138         TRACE("Removing the pbo attached to surface %p.\n", surface);
6139         surface_remove_pbo(surface, gl_info);
6140     }
6141
6142     if (format.convert)
6143     {
6144         /* This code is entered for texture formats which need a fixup. */
6145         UINT height = surface->resource.height;
6146
6147         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6148         dst_pitch = width * format.conv_byte_count;
6149         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6150
6151         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6152         {
6153             ERR("Out of memory (%u).\n", dst_pitch * height);
6154             context_release(context);
6155             return E_OUTOFMEMORY;
6156         }
6157         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6158         format.byte_count = format.conv_byte_count;
6159         src_pitch = dst_pitch;
6160     }
6161     else if (convert != WINED3D_CT_NONE && surface->resource.allocatedMemory)
6162     {
6163         /* This code is only entered for color keying fixups */
6164         UINT height = surface->resource.height;
6165
6166         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6167         dst_pitch = width * format.conv_byte_count;
6168         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6169
6170         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6171         {
6172             ERR("Out of memory (%u).\n", dst_pitch * height);
6173             context_release(context);
6174             return E_OUTOFMEMORY;
6175         }
6176         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6177                 width, height, dst_pitch, convert, surface);
6178         format.byte_count = format.conv_byte_count;
6179         src_pitch = dst_pitch;
6180     }
6181     else
6182     {
6183         mem = surface->resource.allocatedMemory;
6184     }
6185
6186     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6187     data.addr = mem;
6188     surface_upload_data(surface, gl_info, &format, &src_rect, src_pitch, &dst_point, srgb, &data);
6189
6190     context_release(context);
6191
6192     /* Don't delete PBO memory. */
6193     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6194         HeapFree(GetProcessHeap(), 0, mem);
6195
6196     return WINED3D_OK;
6197 }
6198
6199 static void surface_multisample_resolve(struct wined3d_surface *surface)
6200 {
6201     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6202
6203     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6204         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6205
6206     surface_blt_fbo(surface->resource.device, WINED3D_TEXF_POINT,
6207             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6208 }
6209
6210 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6211 {
6212     struct wined3d_device *device = surface->resource.device;
6213     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6214     HRESULT hr;
6215
6216     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6217
6218     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6219     {
6220         if (location == SFLAG_INTEXTURE)
6221         {
6222             struct wined3d_context *context = context_acquire(device, NULL);
6223             surface_load_ds_location(surface, context, location);
6224             context_release(context);
6225             return WINED3D_OK;
6226         }
6227         else
6228         {
6229             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6230             return WINED3DERR_INVALIDCALL;
6231         }
6232     }
6233
6234     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6235         location = SFLAG_INTEXTURE;
6236
6237     if (surface->flags & location)
6238     {
6239         TRACE("Location already up to date.\n");
6240
6241         if (location == SFLAG_INSYSMEM && !(surface->flags & SFLAG_PBO)
6242                 && surface_need_pbo(surface, gl_info))
6243             surface_load_pbo(surface, gl_info);
6244
6245         return WINED3D_OK;
6246     }
6247
6248     if (WARN_ON(d3d_surface))
6249     {
6250         DWORD required_access = resource_access_from_location(location);
6251         if ((surface->resource.access_flags & required_access) != required_access)
6252             WARN("Operation requires %#x access, but surface only has %#x.\n",
6253                     required_access, surface->resource.access_flags);
6254     }
6255
6256     if (!(surface->flags & SFLAG_LOCATIONS))
6257     {
6258         ERR("Surface %p does not have any up to date location.\n", surface);
6259         surface->flags |= SFLAG_LOST;
6260         return WINED3DERR_DEVICELOST;
6261     }
6262
6263     switch (location)
6264     {
6265         case SFLAG_INSYSMEM:
6266             surface_load_sysmem(surface, gl_info, rect);
6267             break;
6268
6269         case SFLAG_INDRAWABLE:
6270             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6271                 return hr;
6272             break;
6273
6274         case SFLAG_INRB_RESOLVED:
6275             surface_multisample_resolve(surface);
6276             break;
6277
6278         case SFLAG_INTEXTURE:
6279         case SFLAG_INSRGBTEX:
6280             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6281                 return hr;
6282             break;
6283
6284         default:
6285             ERR("Don't know how to handle location %#x.\n", location);
6286             break;
6287     }
6288
6289     if (!rect)
6290     {
6291         surface->flags |= location;
6292
6293         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6294             surface_evict_sysmem(surface);
6295     }
6296
6297     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6298             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6299     {
6300         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6301     }
6302
6303     return WINED3D_OK;
6304 }
6305
6306 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6307 {
6308     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6309
6310     /* Not on a swapchain - must be offscreen */
6311     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6312
6313     /* The front buffer is always onscreen */
6314     if (surface == swapchain->front_buffer) return FALSE;
6315
6316     /* If the swapchain is rendered to an FBO, the backbuffer is
6317      * offscreen, otherwise onscreen */
6318     return swapchain->render_to_fbo;
6319 }
6320
6321 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6322 /* Context activation is done by the caller. */
6323 static void ffp_blit_free(struct wined3d_device *device) { }
6324
6325 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6326 /* Context activation is done by the caller. */
6327 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6328 {
6329     BYTE table[256][4];
6330     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6331
6332     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6333
6334     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6335     ENTER_GL();
6336     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6337     LEAVE_GL();
6338 }
6339
6340 /* Context activation is done by the caller. */
6341 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6342 {
6343     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6344
6345     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6346      * else the surface is converted in software at upload time in LoadLocation.
6347      */
6348     if (!(surface->flags & SFLAG_CONVERTED) && fixup == COMPLEX_FIXUP_P8
6349             && context->gl_info->supported[EXT_PALETTED_TEXTURE])
6350         ffp_blit_p8_upload_palette(surface, context->gl_info);
6351
6352     ENTER_GL();
6353     glEnable(surface->texture_target);
6354     checkGLcall("glEnable(surface->texture_target)");
6355     LEAVE_GL();
6356     return WINED3D_OK;
6357 }
6358
6359 /* Context activation is done by the caller. */
6360 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6361 {
6362     ENTER_GL();
6363     glDisable(GL_TEXTURE_2D);
6364     checkGLcall("glDisable(GL_TEXTURE_2D)");
6365     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6366     {
6367         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6368         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6369     }
6370     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6371     {
6372         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6373         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6374     }
6375     LEAVE_GL();
6376 }
6377
6378 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6379         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
6380         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
6381 {
6382     enum complex_fixup src_fixup;
6383
6384     switch (blit_op)
6385     {
6386         case WINED3D_BLIT_OP_COLOR_BLIT:
6387             if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM)
6388                 return FALSE;
6389
6390             src_fixup = get_complex_fixup(src_format->color_fixup);
6391             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6392             {
6393                 TRACE("Checking support for fixup:\n");
6394                 dump_color_fixup_desc(src_format->color_fixup);
6395             }
6396
6397             if (!is_identity_fixup(dst_format->color_fixup))
6398             {
6399                 TRACE("Destination fixups are not supported\n");
6400                 return FALSE;
6401             }
6402
6403             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6404             {
6405                 TRACE("P8 fixup supported\n");
6406                 return TRUE;
6407             }
6408
6409             /* We only support identity conversions. */
6410             if (is_identity_fixup(src_format->color_fixup))
6411             {
6412                 TRACE("[OK]\n");
6413                 return TRUE;
6414             }
6415
6416             TRACE("[FAILED]\n");
6417             return FALSE;
6418
6419         case WINED3D_BLIT_OP_COLOR_FILL:
6420             if (dst_pool == WINED3D_POOL_SYSTEM_MEM)
6421                 return FALSE;
6422
6423             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6424             {
6425                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6426                     return FALSE;
6427             }
6428             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6429             {
6430                 TRACE("Color fill not supported\n");
6431                 return FALSE;
6432             }
6433
6434             /* FIXME: We should reject color fills on formats with fixups,
6435              * but this would break P8 color fills for example. */
6436
6437             return TRUE;
6438
6439         case WINED3D_BLIT_OP_DEPTH_FILL:
6440             return TRUE;
6441
6442         default:
6443             TRACE("Unsupported blit_op=%d\n", blit_op);
6444             return FALSE;
6445     }
6446 }
6447
6448 /* Do not call while under the GL lock. */
6449 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6450         const RECT *dst_rect, const struct wined3d_color *color)
6451 {
6452     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6453     struct wined3d_fb_state fb = {&dst_surface, NULL};
6454
6455     return device_clear_render_targets(device, 1, &fb,
6456             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6457 }
6458
6459 /* Do not call while under the GL lock. */
6460 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6461         struct wined3d_surface *surface, const RECT *rect, float depth)
6462 {
6463     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6464     struct wined3d_fb_state fb = {NULL, surface};
6465
6466     return device_clear_render_targets(device, 0, &fb,
6467             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6468 }
6469
6470 const struct blit_shader ffp_blit =  {
6471     ffp_blit_alloc,
6472     ffp_blit_free,
6473     ffp_blit_set,
6474     ffp_blit_unset,
6475     ffp_blit_supported,
6476     ffp_blit_color_fill,
6477     ffp_blit_depth_fill,
6478 };
6479
6480 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6481 {
6482     return WINED3D_OK;
6483 }
6484
6485 /* Context activation is done by the caller. */
6486 static void cpu_blit_free(struct wined3d_device *device)
6487 {
6488 }
6489
6490 /* Context activation is done by the caller. */
6491 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6492 {
6493     return WINED3D_OK;
6494 }
6495
6496 /* Context activation is done by the caller. */
6497 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6498 {
6499 }
6500
6501 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6502         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
6503         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
6504 {
6505     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6506     {
6507         return TRUE;
6508     }
6509
6510     return FALSE;
6511 }
6512
6513 static HRESULT surface_cpu_blt_compressed(const BYTE *src_data, BYTE *dst_data,
6514         UINT src_pitch, UINT dst_pitch, UINT update_w, UINT update_h,
6515         const struct wined3d_format *format, DWORD flags, const WINEDDBLTFX *fx)
6516 {
6517     UINT row_block_count;
6518     const BYTE *src_row;
6519     BYTE *dst_row;
6520     UINT x, y;
6521
6522     src_row = src_data;
6523     dst_row = dst_data;
6524
6525     row_block_count = (update_w + format->block_width - 1) / format->block_width;
6526
6527     if (!flags)
6528     {
6529         for (y = 0; y < update_h; y += format->block_height)
6530         {
6531             memcpy(dst_row, src_row, row_block_count * format->block_byte_count);
6532             src_row += src_pitch;
6533             dst_row += dst_pitch;
6534         }
6535
6536         return WINED3D_OK;
6537     }
6538
6539     if (flags == WINEDDBLT_DDFX && fx->dwDDFX == WINEDDBLTFX_MIRRORUPDOWN)
6540     {
6541         src_row += (((update_h / format->block_height) - 1) * src_pitch);
6542
6543         switch (format->id)
6544         {
6545             case WINED3DFMT_DXT1:
6546                 for (y = 0; y < update_h; y += format->block_height)
6547                 {
6548                     struct block
6549                     {
6550                         WORD color[2];
6551                         BYTE control_row[4];
6552                     };
6553
6554                     const struct block *s = (const struct block *)src_row;
6555                     struct block *d = (struct block *)dst_row;
6556
6557                     for (x = 0; x < row_block_count; ++x)
6558                     {
6559                         d[x].color[0] = s[x].color[0];
6560                         d[x].color[1] = s[x].color[1];
6561                         d[x].control_row[0] = s[x].control_row[3];
6562                         d[x].control_row[1] = s[x].control_row[2];
6563                         d[x].control_row[2] = s[x].control_row[1];
6564                         d[x].control_row[3] = s[x].control_row[0];
6565                     }
6566                     src_row -= src_pitch;
6567                     dst_row += dst_pitch;
6568                 }
6569                 return WINED3D_OK;
6570
6571             case WINED3DFMT_DXT3:
6572                 for (y = 0; y < update_h; y += format->block_height)
6573                 {
6574                     struct block
6575                     {
6576                         WORD alpha_row[4];
6577                         WORD color[2];
6578                         BYTE control_row[4];
6579                     };
6580
6581                     const struct block *s = (const struct block *)src_row;
6582                     struct block *d = (struct block *)dst_row;
6583
6584                     for (x = 0; x < row_block_count; ++x)
6585                     {
6586                         d[x].alpha_row[0] = s[x].alpha_row[3];
6587                         d[x].alpha_row[1] = s[x].alpha_row[2];
6588                         d[x].alpha_row[2] = s[x].alpha_row[1];
6589                         d[x].alpha_row[3] = s[x].alpha_row[0];
6590                         d[x].color[0] = s[x].color[0];
6591                         d[x].color[1] = s[x].color[1];
6592                         d[x].control_row[0] = s[x].control_row[3];
6593                         d[x].control_row[1] = s[x].control_row[2];
6594                         d[x].control_row[2] = s[x].control_row[1];
6595                         d[x].control_row[3] = s[x].control_row[0];
6596                     }
6597                     src_row -= src_pitch;
6598                     dst_row += dst_pitch;
6599                 }
6600                 return WINED3D_OK;
6601
6602             default:
6603                 FIXME("Compressed flip not implemented for format %s.\n",
6604                         debug_d3dformat(format->id));
6605                 return E_NOTIMPL;
6606         }
6607     }
6608
6609     FIXME("Unsupported blit on compressed surface (format %s, flags %#x, DDFX %#x).\n",
6610             debug_d3dformat(format->id), flags, flags & WINEDDBLT_DDFX ? fx->dwDDFX : 0);
6611
6612     return E_NOTIMPL;
6613 }
6614
6615 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6616         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6617         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter)
6618 {
6619     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6620     const struct wined3d_format *src_format, *dst_format;
6621     struct wined3d_surface *orig_src = src_surface;
6622     struct wined3d_mapped_rect dst_map, src_map;
6623     const BYTE *sbase = NULL;
6624     HRESULT hr = WINED3D_OK;
6625     const BYTE *sbuf;
6626     BYTE *dbuf;
6627     int x, y;
6628
6629     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6630             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6631             flags, fx, debug_d3dtexturefiltertype(filter));
6632
6633     if (src_surface == dst_surface)
6634     {
6635         wined3d_surface_map(dst_surface, &dst_map, NULL, 0);
6636         src_map = dst_map;
6637         src_format = dst_surface->resource.format;
6638         dst_format = src_format;
6639     }
6640     else
6641     {
6642         dst_format = dst_surface->resource.format;
6643         if (src_surface)
6644         {
6645             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6646             {
6647                 src_surface = surface_convert_format(src_surface, dst_format->id);
6648                 if (!src_surface)
6649                 {
6650                     /* The conv function writes a FIXME */
6651                     WARN("Cannot convert source surface format to dest format.\n");
6652                     goto release;
6653                 }
6654             }
6655             wined3d_surface_map(src_surface, &src_map, NULL, WINED3DLOCK_READONLY);
6656             src_format = src_surface->resource.format;
6657         }
6658         else
6659         {
6660             src_format = dst_format;
6661         }
6662
6663         wined3d_surface_map(dst_surface, &dst_map, dst_rect, 0);
6664     }
6665
6666     bpp = dst_surface->resource.format->byte_count;
6667     srcheight = src_rect->bottom - src_rect->top;
6668     srcwidth = src_rect->right - src_rect->left;
6669     dstheight = dst_rect->bottom - dst_rect->top;
6670     dstwidth = dst_rect->right - dst_rect->left;
6671     width = (dst_rect->right - dst_rect->left) * bpp;
6672
6673     if (src_surface)
6674         sbase = (BYTE *)src_map.data
6675                 + ((src_rect->top / src_format->block_height) * src_map.row_pitch)
6676                 + ((src_rect->left / src_format->block_width) * src_format->block_byte_count);
6677     if (src_surface != dst_surface)
6678         dbuf = dst_map.data;
6679     else
6680         dbuf = (BYTE *)dst_map.data
6681                 + ((dst_rect->top / dst_format->block_height) * dst_map.row_pitch)
6682                 + ((dst_rect->left / dst_format->block_width) * dst_format->block_byte_count);
6683
6684     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_BLOCKS)
6685     {
6686         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6687
6688         if (src_surface == dst_surface)
6689         {
6690             FIXME("Only plain blits supported on compressed surfaces.\n");
6691             hr = E_NOTIMPL;
6692             goto release;
6693         }
6694
6695         if (srcheight != dstheight || srcwidth != dstwidth)
6696         {
6697             WARN("Stretching not supported on compressed surfaces.\n");
6698             hr = WINED3DERR_INVALIDCALL;
6699             goto release;
6700         }
6701
6702         if (srcwidth & (src_format->block_width - 1) || srcheight & (src_format->block_height - 1))
6703         {
6704             WARN("Rectangle not block-aligned.\n");
6705             hr = WINED3DERR_INVALIDCALL;
6706             goto release;
6707         }
6708
6709         hr = surface_cpu_blt_compressed(sbase, dbuf,
6710                 src_map.row_pitch, dst_map.row_pitch, dstwidth, dstheight,
6711                 src_format, flags, fx);
6712         goto release;
6713     }
6714
6715     /* First, all the 'source-less' blits */
6716     if (flags & WINEDDBLT_COLORFILL)
6717     {
6718         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, fx->u5.dwFillColor);
6719         flags &= ~WINEDDBLT_COLORFILL;
6720     }
6721
6722     if (flags & WINEDDBLT_DEPTHFILL)
6723     {
6724         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6725     }
6726     if (flags & WINEDDBLT_ROP)
6727     {
6728         /* Catch some degenerate cases here. */
6729         switch (fx->dwROP)
6730         {
6731             case BLACKNESS:
6732                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, 0);
6733                 break;
6734             case 0xAA0029: /* No-op */
6735                 break;
6736             case WHITENESS:
6737                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, ~0U);
6738                 break;
6739             case SRCCOPY: /* Well, we do that below? */
6740                 break;
6741             default:
6742                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6743                 goto error;
6744         }
6745         flags &= ~WINEDDBLT_ROP;
6746     }
6747     if (flags & WINEDDBLT_DDROPS)
6748     {
6749         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6750     }
6751     /* Now the 'with source' blits. */
6752     if (src_surface)
6753     {
6754         int sx, xinc, sy, yinc;
6755
6756         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6757             goto release;
6758
6759         if (filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT
6760                 && (srcwidth != dstwidth || srcheight != dstheight))
6761         {
6762             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6763             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6764         }
6765
6766         xinc = (srcwidth << 16) / dstwidth;
6767         yinc = (srcheight << 16) / dstheight;
6768
6769         if (!flags)
6770         {
6771             /* No effects, we can cheat here. */
6772             if (dstwidth == srcwidth)
6773             {
6774                 if (dstheight == srcheight)
6775                 {
6776                     /* No stretching in either direction. This needs to be as
6777                      * fast as possible. */
6778                     sbuf = sbase;
6779
6780                     /* Check for overlapping surfaces. */
6781                     if (src_surface != dst_surface || dst_rect->top < src_rect->top
6782                             || dst_rect->right <= src_rect->left || src_rect->right <= dst_rect->left)
6783                     {
6784                         /* No overlap, or dst above src, so copy from top downwards. */
6785                         for (y = 0; y < dstheight; ++y)
6786                         {
6787                             memcpy(dbuf, sbuf, width);
6788                             sbuf += src_map.row_pitch;
6789                             dbuf += dst_map.row_pitch;
6790                         }
6791                     }
6792                     else if (dst_rect->top > src_rect->top)
6793                     {
6794                         /* Copy from bottom upwards. */
6795                         sbuf += src_map.row_pitch * dstheight;
6796                         dbuf += dst_map.row_pitch * dstheight;
6797                         for (y = 0; y < dstheight; ++y)
6798                         {
6799                             sbuf -= src_map.row_pitch;
6800                             dbuf -= dst_map.row_pitch;
6801                             memcpy(dbuf, sbuf, width);
6802                         }
6803                     }
6804                     else
6805                     {
6806                         /* Src and dst overlapping on the same line, use memmove. */
6807                         for (y = 0; y < dstheight; ++y)
6808                         {
6809                             memmove(dbuf, sbuf, width);
6810                             sbuf += src_map.row_pitch;
6811                             dbuf += dst_map.row_pitch;
6812                         }
6813                     }
6814                 }
6815                 else
6816                 {
6817                     /* Stretching in y direction only. */
6818                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6819                     {
6820                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6821                         memcpy(dbuf, sbuf, width);
6822                         dbuf += dst_map.row_pitch;
6823                     }
6824                 }
6825             }
6826             else
6827             {
6828                 /* Stretching in X direction. */
6829                 int last_sy = -1;
6830                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6831                 {
6832                     sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6833
6834                     if ((sy >> 16) == (last_sy >> 16))
6835                     {
6836                         /* This source row is the same as last source row -
6837                          * Copy the already stretched row. */
6838                         memcpy(dbuf, dbuf - dst_map.row_pitch, width);
6839                     }
6840                     else
6841                     {
6842 #define STRETCH_ROW(type) \
6843 do { \
6844     const type *s = (const type *)sbuf; \
6845     type *d = (type *)dbuf; \
6846     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6847         d[x] = s[sx >> 16]; \
6848 } while(0)
6849
6850                         switch(bpp)
6851                         {
6852                             case 1:
6853                                 STRETCH_ROW(BYTE);
6854                                 break;
6855                             case 2:
6856                                 STRETCH_ROW(WORD);
6857                                 break;
6858                             case 4:
6859                                 STRETCH_ROW(DWORD);
6860                                 break;
6861                             case 3:
6862                             {
6863                                 const BYTE *s;
6864                                 BYTE *d = dbuf;
6865                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6866                                 {
6867                                     DWORD pixel;
6868
6869                                     s = sbuf + 3 * (sx >> 16);
6870                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6871                                     d[0] = (pixel      ) & 0xff;
6872                                     d[1] = (pixel >>  8) & 0xff;
6873                                     d[2] = (pixel >> 16) & 0xff;
6874                                     d += 3;
6875                                 }
6876                                 break;
6877                             }
6878                             default:
6879                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6880                                 hr = WINED3DERR_NOTAVAILABLE;
6881                                 goto error;
6882                         }
6883 #undef STRETCH_ROW
6884                     }
6885                     dbuf += dst_map.row_pitch;
6886                     last_sy = sy;
6887                 }
6888             }
6889         }
6890         else
6891         {
6892             LONG dstyinc = dst_map.row_pitch, dstxinc = bpp;
6893             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6894             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6895             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6896             {
6897                 /* The color keying flags are checked for correctness in ddraw */
6898                 if (flags & WINEDDBLT_KEYSRC)
6899                 {
6900                     keylow  = src_surface->src_blt_color_key.color_space_low_value;
6901                     keyhigh = src_surface->src_blt_color_key.color_space_high_value;
6902                 }
6903                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6904                 {
6905                     keylow = fx->ddckSrcColorkey.color_space_low_value;
6906                     keyhigh = fx->ddckSrcColorkey.color_space_high_value;
6907                 }
6908
6909                 if (flags & WINEDDBLT_KEYDEST)
6910                 {
6911                     /* Destination color keys are taken from the source surface! */
6912                     destkeylow = src_surface->dst_blt_color_key.color_space_low_value;
6913                     destkeyhigh = src_surface->dst_blt_color_key.color_space_high_value;
6914                 }
6915                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6916                 {
6917                     destkeylow = fx->ddckDestColorkey.color_space_low_value;
6918                     destkeyhigh = fx->ddckDestColorkey.color_space_high_value;
6919                 }
6920
6921                 if (bpp == 1)
6922                 {
6923                     keymask = 0xff;
6924                 }
6925                 else
6926                 {
6927                     keymask = src_format->red_mask
6928                             | src_format->green_mask
6929                             | src_format->blue_mask;
6930                 }
6931                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6932             }
6933
6934             if (flags & WINEDDBLT_DDFX)
6935             {
6936                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6937                 LONG tmpxy;
6938                 dTopLeft     = dbuf;
6939                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6940                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dst_map.row_pitch);
6941                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6942
6943                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6944                 {
6945                     /* I don't think we need to do anything about this flag */
6946                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6947                 }
6948                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6949                 {
6950                     tmp          = dTopRight;
6951                     dTopRight    = dTopLeft;
6952                     dTopLeft     = tmp;
6953                     tmp          = dBottomRight;
6954                     dBottomRight = dBottomLeft;
6955                     dBottomLeft  = tmp;
6956                     dstxinc = dstxinc * -1;
6957                 }
6958                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6959                 {
6960                     tmp          = dTopLeft;
6961                     dTopLeft     = dBottomLeft;
6962                     dBottomLeft  = tmp;
6963                     tmp          = dTopRight;
6964                     dTopRight    = dBottomRight;
6965                     dBottomRight = tmp;
6966                     dstyinc = dstyinc * -1;
6967                 }
6968                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6969                 {
6970                     /* I don't think we need to do anything about this flag */
6971                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6972                 }
6973                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6974                 {
6975                     tmp          = dBottomRight;
6976                     dBottomRight = dTopLeft;
6977                     dTopLeft     = tmp;
6978                     tmp          = dBottomLeft;
6979                     dBottomLeft  = dTopRight;
6980                     dTopRight    = tmp;
6981                     dstxinc = dstxinc * -1;
6982                     dstyinc = dstyinc * -1;
6983                 }
6984                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6985                 {
6986                     tmp          = dTopLeft;
6987                     dTopLeft     = dBottomLeft;
6988                     dBottomLeft  = dBottomRight;
6989                     dBottomRight = dTopRight;
6990                     dTopRight    = tmp;
6991                     tmpxy   = dstxinc;
6992                     dstxinc = dstyinc;
6993                     dstyinc = tmpxy;
6994                     dstxinc = dstxinc * -1;
6995                 }
6996                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6997                 {
6998                     tmp          = dTopLeft;
6999                     dTopLeft     = dTopRight;
7000                     dTopRight    = dBottomRight;
7001                     dBottomRight = dBottomLeft;
7002                     dBottomLeft  = tmp;
7003                     tmpxy   = dstxinc;
7004                     dstxinc = dstyinc;
7005                     dstyinc = tmpxy;
7006                     dstyinc = dstyinc * -1;
7007                 }
7008                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
7009                 {
7010                     /* I don't think we need to do anything about this flag */
7011                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
7012                 }
7013                 dbuf = dTopLeft;
7014                 flags &= ~(WINEDDBLT_DDFX);
7015             }
7016
7017 #define COPY_COLORKEY_FX(type) \
7018 do { \
7019     const type *s; \
7020     type *d = (type *)dbuf, *dx, tmp; \
7021     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
7022     { \
7023         s = (const type *)(sbase + (sy >> 16) * src_map.row_pitch); \
7024         dx = d; \
7025         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
7026         { \
7027             tmp = s[sx >> 16]; \
7028             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
7029                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
7030             { \
7031                 dx[0] = tmp; \
7032             } \
7033             dx = (type *)(((BYTE *)dx) + dstxinc); \
7034         } \
7035         d = (type *)(((BYTE *)d) + dstyinc); \
7036     } \
7037 } while(0)
7038
7039             switch (bpp)
7040             {
7041                 case 1:
7042                     COPY_COLORKEY_FX(BYTE);
7043                     break;
7044                 case 2:
7045                     COPY_COLORKEY_FX(WORD);
7046                     break;
7047                 case 4:
7048                     COPY_COLORKEY_FX(DWORD);
7049                     break;
7050                 case 3:
7051                 {
7052                     const BYTE *s;
7053                     BYTE *d = dbuf, *dx;
7054                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
7055                     {
7056                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
7057                         dx = d;
7058                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7059                         {
7060                             DWORD pixel, dpixel = 0;
7061                             s = sbuf + 3 * (sx>>16);
7062                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7063                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7064                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7065                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7066                             {
7067                                 dx[0] = (pixel      ) & 0xff;
7068                                 dx[1] = (pixel >>  8) & 0xff;
7069                                 dx[2] = (pixel >> 16) & 0xff;
7070                             }
7071                             dx += dstxinc;
7072                         }
7073                         d += dstyinc;
7074                     }
7075                     break;
7076                 }
7077                 default:
7078                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7079                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7080                     hr = WINED3DERR_NOTAVAILABLE;
7081                     goto error;
7082 #undef COPY_COLORKEY_FX
7083             }
7084         }
7085     }
7086
7087 error:
7088     if (flags && FIXME_ON(d3d_surface))
7089     {
7090         FIXME("\tUnsupported flags: %#x.\n", flags);
7091     }
7092
7093 release:
7094     wined3d_surface_unmap(dst_surface);
7095     if (src_surface && src_surface != dst_surface)
7096         wined3d_surface_unmap(src_surface);
7097     /* Release the converted surface, if any. */
7098     if (src_surface && src_surface != orig_src)
7099         wined3d_surface_decref(src_surface);
7100
7101     return hr;
7102 }
7103
7104 /* Do not call while under the GL lock. */
7105 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7106         const RECT *dst_rect, const struct wined3d_color *color)
7107 {
7108     static const RECT src_rect;
7109     WINEDDBLTFX BltFx;
7110
7111     memset(&BltFx, 0, sizeof(BltFx));
7112     BltFx.dwSize = sizeof(BltFx);
7113     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7114     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7115             WINEDDBLT_COLORFILL, &BltFx, WINED3D_TEXF_POINT);
7116 }
7117
7118 /* Do not call while under the GL lock. */
7119 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7120         struct wined3d_surface *surface, const RECT *rect, float depth)
7121 {
7122     FIXME("Depth filling not implemented by cpu_blit.\n");
7123     return WINED3DERR_INVALIDCALL;
7124 }
7125
7126 const struct blit_shader cpu_blit =  {
7127     cpu_blit_alloc,
7128     cpu_blit_free,
7129     cpu_blit_set,
7130     cpu_blit_unset,
7131     cpu_blit_supported,
7132     cpu_blit_color_fill,
7133     cpu_blit_depth_fill,
7134 };
7135
7136 static HRESULT surface_init(struct wined3d_surface *surface, enum wined3d_surface_type surface_type, UINT alignment,
7137         UINT width, UINT height, UINT level, enum wined3d_multisample_type multisample_type,
7138         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7139         enum wined3d_pool pool, DWORD flags, void *parent, const struct wined3d_parent_ops *parent_ops)
7140 {
7141     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7142     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7143     BOOL lockable = flags & WINED3D_SURFACE_MAPPABLE;
7144     unsigned int resource_size;
7145     HRESULT hr;
7146
7147     if (multisample_quality > 0)
7148     {
7149         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7150         multisample_quality = 0;
7151     }
7152
7153     /* Quick lockable sanity check.
7154      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7155      * this function is too deep to need to care about things like this.
7156      * Levels need to be checked too, since they all affect what can be done. */
7157     switch (pool)
7158     {
7159         case WINED3D_POOL_SCRATCH:
7160             if (!lockable)
7161             {
7162                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7163                         "which are mutually exclusive, setting lockable to TRUE.\n");
7164                 lockable = TRUE;
7165             }
7166             break;
7167
7168         case WINED3D_POOL_SYSTEM_MEM:
7169             if (!lockable)
7170                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7171             break;
7172
7173         case WINED3D_POOL_MANAGED:
7174             if (usage & WINED3DUSAGE_DYNAMIC)
7175                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7176             break;
7177
7178         case WINED3D_POOL_DEFAULT:
7179             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7180                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7181             break;
7182
7183         default:
7184             FIXME("Unknown pool %#x.\n", pool);
7185             break;
7186     };
7187
7188     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3D_POOL_DEFAULT)
7189         FIXME("Trying to create a render target that isn't in the default pool.\n");
7190
7191     /* FIXME: Check that the format is supported by the device. */
7192
7193     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7194     if (!resource_size)
7195         return WINED3DERR_INVALIDCALL;
7196
7197     surface->surface_type = surface_type;
7198
7199     switch (surface_type)
7200     {
7201         case WINED3D_SURFACE_TYPE_OPENGL:
7202             surface->surface_ops = &surface_ops;
7203             break;
7204
7205         case WINED3D_SURFACE_TYPE_GDI:
7206             surface->surface_ops = &gdi_surface_ops;
7207             break;
7208
7209         default:
7210             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7211             return WINED3DERR_INVALIDCALL;
7212     }
7213
7214     hr = resource_init(&surface->resource, device, WINED3D_RTYPE_SURFACE, format,
7215             multisample_type, multisample_quality, usage, pool, width, height, 1,
7216             resource_size, parent, parent_ops, &surface_resource_ops);
7217     if (FAILED(hr))
7218     {
7219         WARN("Failed to initialize resource, returning %#x.\n", hr);
7220         return hr;
7221     }
7222
7223     /* "Standalone" surface. */
7224     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7225
7226     surface->texture_level = level;
7227     list_init(&surface->overlays);
7228
7229     /* Flags */
7230     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7231     if (flags & WINED3D_SURFACE_DISCARD)
7232         surface->flags |= SFLAG_DISCARD;
7233     if (flags & WINED3D_SURFACE_PIN_SYSMEM)
7234         surface->flags |= SFLAG_PIN_SYSMEM;
7235     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7236         surface->flags |= SFLAG_LOCKABLE;
7237     /* I'm not sure if this qualifies as a hack or as an optimization. It
7238      * seems reasonable to assume that lockable render targets will get
7239      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7240      * creation. However, the other reason we want to do this is that several
7241      * ddraw applications access surface memory while the surface isn't
7242      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7243      * future locks prevents these from crashing. */
7244     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7245         surface->flags |= SFLAG_DYNLOCK;
7246
7247     /* Mark the texture as dirty so that it gets loaded first time around. */
7248     surface_add_dirty_rect(surface, NULL);
7249     list_init(&surface->renderbuffers);
7250
7251     TRACE("surface %p, memory %p, size %u\n",
7252             surface, surface->resource.allocatedMemory, surface->resource.size);
7253
7254     /* Call the private setup routine */
7255     hr = surface->surface_ops->surface_private_setup(surface);
7256     if (FAILED(hr))
7257     {
7258         ERR("Private setup failed, returning %#x\n", hr);
7259         surface_cleanup(surface);
7260         return hr;
7261     }
7262
7263     /* Similar to lockable rendertargets above, creating the DIB section
7264      * during surface initialization prevents the sysmem pointer from changing
7265      * after a wined3d_surface_getdc() call. */
7266     if ((usage & WINED3DUSAGE_OWNDC) && !surface->hDC
7267             && SUCCEEDED(surface_create_dib_section(surface)))
7268     {
7269         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
7270         surface->resource.heapMemory = NULL;
7271         surface->resource.allocatedMemory = surface->dib.bitmap_data;
7272     }
7273
7274     return hr;
7275 }
7276
7277 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7278         enum wined3d_format_id format_id, UINT level, DWORD usage, enum wined3d_pool pool,
7279         enum wined3d_multisample_type multisample_type, DWORD multisample_quality,
7280         enum wined3d_surface_type surface_type, DWORD flags, void *parent,
7281         const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7282 {
7283     struct wined3d_surface *object;
7284     HRESULT hr;
7285
7286     TRACE("device %p, width %u, height %u, format %s, level %u\n",
7287             device, width, height, debug_d3dformat(format_id), level);
7288     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7289             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7290     TRACE("surface_type %#x, flags %#x, parent %p, parent_ops %p.\n", surface_type, flags, parent, parent_ops);
7291
7292     if (surface_type == WINED3D_SURFACE_TYPE_OPENGL && !device->adapter)
7293     {
7294         ERR("OpenGL surfaces are not available without OpenGL.\n");
7295         return WINED3DERR_NOTAVAILABLE;
7296     }
7297
7298     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7299     if (!object)
7300     {
7301         ERR("Failed to allocate surface memory.\n");
7302         return WINED3DERR_OUTOFVIDEOMEMORY;
7303     }
7304
7305     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level,
7306             multisample_type, multisample_quality, device, usage, format_id, pool, flags, parent, parent_ops);
7307     if (FAILED(hr))
7308     {
7309         WARN("Failed to initialize surface, returning %#x.\n", hr);
7310         HeapFree(GetProcessHeap(), 0, object);
7311         return hr;
7312     }
7313
7314     TRACE("Created surface %p.\n", object);
7315     *surface = object;
7316
7317     return hr;
7318 }